Note that the MASS package is required to run the CPASSOC. Unfortunately this clashes with the dplyrselect(). So be prepared to run detach("package:MASS", unload=TRUE) or use dplyr::select() to get some things to work if you’re moving back and forth throughout the code.
Show the code
library(tidyverse) # tidy coding, ggplot etclibrary(glue) # for coding within stringslibrary(bigsnpr) # to install: devtools::install_github("privefl/bigsnpr")library(pander) # for tableslibrary(brms) # for bayesian modelslibrary(ggtext) # for markdown syntax in ggplotlibrary(MetBrewer) # for more colour paletteslibrary(MoMAColors) # nicer colours once againlibrary(PNWColors) # further nicer colourslibrary(patchwork) # building composite plotslibrary(DT) # for nice tableslibrary(kableExtra) # for more nice tableslibrary(ggrepel) # for labelling ggplotslibrary(pheatmap) # for heat mapslibrary(MASS) # needed for CPASSOClibrary(Matrix) # needed for CPASSOC# build a helper function that produces a table to display our data# Create a function to build HTML searchable tablesmy_data_table <-function(df){datatable( df, rownames=FALSE,autoHideNavigation =TRUE,extensions =c("Scroller", "Buttons"),options =list(autoWidth =TRUE,dom ='Bfrtip',deferRender=TRUE,scrollX=TRUE, scrollY=1000,scrollCollapse=TRUE,buttons =list('pageLength', 'colvis', 'csv', list(extend ='pdf',pageSize ='A4',orientation ='landscape',filename ='Lifespan_data')),pageLength =100 ) )}
Load variant/gene annotations
DGRP variant annotations were downloaded from the DGRP website and gene annotations for all the genes covered by DGRP variants, from the org.Dm.eg.db database object from Bioconductor.
These will be useful later when we aim to identify whether variants with notable associations with a trait overlap with any genes.
Show the code
# Helper function to split a vector into chunks chunker <-function(x, max_chunk_size) split(x, ceiling(seq_along(x) / max_chunk_size))if(!file.exists("data/derived/annotations.csv")){# Load annotation file, get important info annot <-read.table("data/input/dgrp.fb557.annot.txt", header =FALSE, stringsAsFactors =FALSE) get.info <-function(rows){lapply(rows, function(row){ site.class.field <-strsplit(annot$V3[row], split ="]")[[1]][1] num.genes <-str_count(site.class.field, ";") +1 output <-cbind(rep(annot$V1[row], num.genes), do.call("rbind", lapply(strsplit(site.class.field, split =";")[[1]], function(x) strsplit(x, split ="[|]")[[1]])))if(ncol(output) ==5) return(output[,c(1,2,4,5)]) # only return SNPs that have some annotation. Don't get the gene symbolelsereturn(NULL) }) %>%do.call("rbind", .) } variant.details <-lapply(chunker(1:nrow(annot), max_chunk_size =10000), get.info) %>%do.call("rbind", .) %>%as.data.frame()names(variant.details) <-c("SNP", "FBID", "site.class", "distance.to.gene") variant.details$FBID <-unlist(str_extract_all(variant.details$FBID, "FBgn[:digit:]+")) # clean up text strings for Flybase ID variant.details %>% dplyr::filter(site.class !="FBgn0003638") %>%# NB this is a bug in the DGRP's annotation filemutate(chr =str_remove_all(substr(SNP, 1, 2), "_")) # get chromosome now for faster sorting later annotations <- variant.details} else annotations <-read_csv("data/derived/annotations.csv")annotations <- annotations %>%left_join(read.csv("data/Input/all_dmel_genes.csv")) %>% dplyr::select(SNP, FBID, site.class, distance.to.gene, gene_name, chromosome)
Loading data used in GWA tests
In the demographic component of this study, we calculated mean values and standard error for each combination of line, sex, study, temperature and mating status. These data are displayed, and can be downloaded from, the below table. Note that for GWA and other SNP based analysis, we removed lines that had not been genotyped (shown as Genotyped = NO). Lines with unknown genotypes also have unknown Wolbachia and inversions status’. Durham et al (2014) cleared all lines of Wolbachia via treatment with tetracycline.
Show the code
genotyped_lines <-read_csv("data/input/Genotyped_lines.csv") %>%mutate(Genotyped ="YES",line =as.factor(line))full_dataset <-read.csv("data/input/lifespan_data.csv") %>%as_tibble() %>%mutate(line =as.factor(Line),Treatment =str_replace(Treatment, " ", "_")) %>% dplyr::select(-Line) %>%left_join(genotyped_lines, by ="line") %>%mutate(Genotyped =if_else(is.na(Genotyped), "NO", Genotyped)) %>% dplyr::select(line, Sex, Temperature, Mated, Study, Treatment, Block, e0, SE_e0, h, SE_h, samp, Genotyped)# DGRP studies often correct for the most common inversions and wolbachia presence. inversions_wolbachia <-read_csv("data/Input/inversions_wolbachia.csv") %>%mutate(line =as.factor(str_remove(line, "DGRP_")),Wolbachia =if_else(Wolbachia =="y", 1, 0),across(2:17, ~case_when(.x =="ST"~0, .x =="INV/ST"~1, .x =="INV"~2))) %>% dplyr::select(line, `In(2L)t`, `In(2R)NS`, `In(3R)P`, `In(3R)K`, `In(3R)Mo`, Wolbachia) %>%rename(In_2L_t =`In(2L)t`,In_2R_NS =`In(2R)NS`,In_3R_P =`In(3R)P`,In_3R_K =`In(3R)K`,In_3R_Mo =`In(3R)Mo`)# inversions pruned to those Huang et al 2015 PNAS corrected forfull_dataset <- full_dataset %>%left_join(inversions_wolbachia) %>%mutate(Wolbachia =if_else(Study =="Durham_2014", 0, Wolbachia)) # study cleared wolbachia with tetracycline before phenotyping my_data_table(full_dataset %>%mutate(across(8:11, ~round(.x, 2))) %>% dplyr::select(1:13))
For GWAS and later CPASSOC, we split the data by study, removed studies that phenotyped < 100 lines and adjusted line means to account for experimental block where applicable. Importantly, we also split the Wilson et al (2020) data by dietary treatment; while we do not explicitly consider diet in our analysis, lifespan in one dietary treatment is considered a separate trait from lifespan measured in a second dietary treatment.
Show the code
Arya_2010_f <- full_dataset %>%filter(Study =="Arya_2010"& Sex =="Female"& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Arya_2010_m <- full_dataset %>%filter(Study =="Arya_2010"& Sex =="Male"& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_f_18 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Female"& Temperature ==18& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_m_18 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Male"& Temperature ==18& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_f_25 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Female"& Temperature ==25& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_m_25 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Male"& Temperature ==25& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_f_28 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Female"& Temperature ==28& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_m_28 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Male"& Temperature ==28& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)# In this study, some lines were measured twice per treatment, and a small subset were measured three times. We take the mean across blocks as the line mean, following the original study.Wilson_2020_f_1 <- full_dataset %>%filter(Treatment =="Wilson_2020_1"& Genotyped =="YES") %>%group_by(line) %>%mutate(e0 =mean(e0),h =mean(h)) %>%ungroup() %>%distinct(line, .keep_all =TRUE) %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Wilson_2020_f_2 <- full_dataset %>%filter(Treatment =="Wilson_2020_2"& Genotyped =="YES") %>%group_by(line) %>%mutate(e0 =mean(e0),h =mean(h)) %>%ungroup() %>%distinct(line, .keep_all =TRUE) %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Durham_2014_f <- full_dataset %>%filter(Study =="Durham_2014"& Genotyped =="YES") %>%mutate(Block =as.factor(Block))# statistically account for the effect of blockDurham_model_e0 <-brm(e0 ~1+ Block + (1|line),data = Durham_2014_f, family = gaussian,prior =c(prior(normal(50, 15), class = Intercept),prior(normal(0, 10), class = b),prior(exponential(1), class = sigma)),chains =4, cores =4, iter =6000, warmup =2000,seed =1, file ="fits/Durham_model_e0")Durham_model_h <-brm(h ~1+ Block + (1|line),data = Durham_2014_f, family = gaussian,prior =c(prior(exponential(1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),chains =4, cores =4, iter =6000, warmup =2000,seed =1, file ="fits/Durham_model_h")Durham_2014_f_adjusted <-as_draws_df(Durham_model_e0) %>% dplyr::select(b_Intercept, contains("r_line")) %>%# mutate each draw to show the lines standardised lifespan in Block 1 (denoted b_Intercept in brms output)mutate(across(2:177, ~ .x + b_Intercept)) %>% dplyr::select(-b_Intercept) %>%pivot_longer(cols =everything(), names_to ="line", values_to ="e0") %>%group_by(line) %>%summarise(e0 =mean(e0)) %>%mutate(line =str_remove(line, fixed("r_line[")),line =str_remove(line, fixed(",Intercept]"))) %>%left_join(as_draws_df(Durham_model_h) %>% dplyr::select(b_Intercept, contains("r_line")) %>%# mutate each draw to show the lines standardised lifespan in Block 1 (denoted b_Intercept in brms output)mutate(across(2:177, ~ .x + b_Intercept)) %>% dplyr::select(-b_Intercept) %>%pivot_longer(cols =everything(), names_to ="line", values_to ="h") %>%group_by(line) %>%summarise(h =mean(h)) %>%mutate(line =str_remove(line, fixed("r_line[")),line =str_remove(line, fixed(",Intercept]"))) ) %>%left_join(Durham_2014_f %>%distinct(line, .keep_all = T) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment)) %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h))Patel_2021_f <- full_dataset %>%filter(Study =="Patel_2021"& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)
Preparing for univariate GWAS
The preparation of data for GWAS generally follows Holman and Wong’s DGRP GWAS of fitness in different contexts. See their associated workflowrreport for a comprehensive breakdown of their analysis.
Install neccessary software and build helper functions
In addition to the R packages we load, plink 1.9 and beagle must also be installed. These software packages allow us to wrangle the data into the correct format and impute missing values, both of which are required to run GWAS with the Gemma R package (I’m still using PLINK for GWAS right now).
Show the code
# build functions to prepare data for GWASprep_for_e0_GWAS <-function(data, sex){data %>%mutate(line =glue("line{line}")) %>% dplyr::select(line, e0_scaled)}prep_for_h_GWAS <-function(data, sex){data %>%#inner_join(read_csv("data/Input/Genotyped_lines.csv"), by = "line") %>% # filter to lines that have been genotypedmutate(line =glue("line{line}")) %>% dplyr::select(line, h) %>%mutate(h =scale(h))}prep_for_SA_GWAS <-function(data){data %>% dplyr::select(line, SA_axis)}prep_for_SC_GWAS <-function(data){data %>% dplyr::select(line, SC_axis)}prep_for_ageing_GWAS <-function(data){ data %>%mutate(line =glue("line{line}")) %>% dplyr::select(line, ageing_axis)}prep_for_scaling_GWAS <-function(data){ data %>%mutate(line =glue("line{line}")) %>% dplyr::select(line, scaling_axis)}# I used bigsnpr::download_plink(dir = "code/windows") which puts it in the code folder - I'm using a windows operating system. The macOS version can also be downloaded into "code/macOS" # Beagle is a software package for phasing genotypes and imputing ungenotyped markers.plink <-paste(getwd(), "code/windows/plink", sep ="/") #windows#plink <- paste(getwd(), "code/macOS/plink", sep = "/") #macOS#beagle <- bigsnpr::download_beagle(# dir = "/Users/tkeaney/Library/CloudStorage/OneDrive-JGU/Postdoc/DGRP_lifespan/DGRP_lifespan_inequality/code/macOS") # only need to download this once - change path depending on operating system# helper function to pass commands to the terminal# Note that we set `intern = TRUE`, and pass the result of `system()` to `cat()`, # ensuring that the Terminal output will be printed in this knitr report.# # This is the mac OS function#run_command_mac <- function(shell_command, wd = getwd(), path = ""){# cat(system(glue("cd ", wd, path, # tell terminal which directory to work in# "\n",shell_command), # on a second terminal line, run the plink command# intern = TRUE), sep = '\n') #}# This is the windows function run_command_windows <-function(plink_command, wd =getwd(), path ="") {# Specify the full path to the plink executable within the 'code' subdirectory. plink_path <-paste(getwd(), "code/windows/plink", sep ="/")# Create the full shell command with the plink executable. command <-glue("cmd.exe /c cd /d {shQuote(file.path(wd, path))} && {shQuote(plink_path)} {plink_command}")# Execute the combined command. result <-system(command, intern =TRUE)# Print the result.cat(result, sep ='\n')# Return the result as a character vector.return(result)}# sometimes we need to run terminal commands without plink - create a slightly different function to do thisrun_command_no_plink <-function(shell_command, wd =getwd(), path ="") {# Create the full shell command with the plink executable. command <-glue("cmd.exe /c cd /d {shQuote(file.path(wd, path))} && {shell_command}")# Execute the combined command. result <-system(command, intern =TRUE)# Print the result.cat(result, sep ='\n')# Return the result as a character vector.return(result)}
Perform SNP quality control and impute missing data
Plink recognises three types of files that are necessary for GWA analysis: the .bed, .bim and .fam files.
.bed: the binary biallelic genotype table. Four options are possible:
00 = homozygous for minor allele
01 = missing genotype
10 = heterozygous genotype
11 = homozygous for major allele
The overwhelming majority of genotypes in the DGRP are homozygous for one of the alleles.
.bim: extended variant information file accompanying the .bed file. It has six fields:
chromosome code
variant identifier
position in morgans
base-pair coordinate
Minor allele
Major allele
.fam: Plink sample information file. It can have the following elements:
Family ID (‘FID’) (in our case just the DGRP line)
Within-family ID (‘IID’; cannot be ‘0’) (in our case just the DGRP line)
Within-family ID of father (‘0’ if father isn’t in dataset)
Within-family ID of mother (‘0’ if mother isn’t in dataset)
Sex code (‘1’ = male, ‘2’ = female, ‘0’ = unknown) - not important for us because we analyse the sexes separately.
Phenotype value (‘1’ = control, ‘2’ = case, ‘-9’/‘0’/non-numeric = missing data) - 9 for us because we supply the phenotypic data later
We cleaned up the DGRP’s .bed/.bim/.fam files (available from the Mackay lab website) by:
Removing any SNPs for which genotypes are missing for >10% of the 205 DGRP lines. We then use the software Beagle to impute the remaining missing genotypes. Imputation takes a long time so ideally, you only want to do it once.
Removing SNPs with a minor allele frequency of less than 5% across the 205 lines. We have negligible power to detect associations for rare SNPs below this threshold.
In the Plink-formatted genotype files, lines fixed for the major allele are coded as 2, and lines fixed for the minor allele as 0. This means that in the association tests we calculate, negative effect sizes mean that the minor allele is associated with lower trait values, while positive effect sizes means that the minor allele is associated with higher trait values.
Show the code
Run_function <-FALSE# Change this to TRUE to run - read through the code before you do this if(Run_function){# Use Plink to clean and subset the DGRP's SNP data as follows:# Only keep SNPs for which at least 90% of the 205 DGRP lines were successfully genotyped (--geno 0.1)# Only keep SNPs with a minor allele frequency of 0.05 or higher (--maf 0.05) across the 205 lines# Write the processed BIM/BED/FAM files to the data/Derived/plink_output directory output_directory <-paste(getwd(), "data/Derived/plink_output", sep ="/")run_command_windows(glue("--bfile dgrp2"," --geno 0.1 --maf 0.05 --allow-no-sex", " --make-bed --out {shQuote(output_directory)}/dgrp2_QC_all_lines"), path ="data/Input/bfiles/")# Use the shell command 'sed' to remove underscores from the DGRP line names in the .fam file (e.g. 'line_120' becomes 'line120')# Otherwise, these underscores cause trouble when we need to convert from PLINK to vcf format (vcf format uses underscore as a separator)# this works on mac but not on windowsfor(i in1:2) run_command_no_plink("sed -i '' 's/_//' dgrp2_QC_all_lines.fam", path ="/data/Derived/")# Now impute the missing genotypes using Beagle# This part uses the data for the full DGRP panel of >200 lines, to infer missing genotypes as accurately as possible. # The bigsnpr package provides a helpful wrapper for Beagle called snp_beagleImpute(): it translates to a VCF file and back again using PLINK# Imputation with the below optimisation took about an hour on my computer, which is not especially powerfulsnp_beagleImpute(beagle, plink, bedfile.in ="data/Derived/plink_output/dgrp2_QC_all_lines.bed", bedfile.out ="data/Derived/plink_output/dgrp2_QC_all_lines_imputed.bed",ncores =4, memory.max =16)# assign a sex of 'female' to all the DGRP lines (Beagle removes the sex, and it seems PLINK needs individuals to have a sex, # despite PLINK having a command called --allow-no-sex)run_command_windows("sed -i '' 's/ 0 0 0/ 0 0 2/' dgrp2_QC_all_lines_imputed.fam", path ="/data/Derived/plink_output/")# Re-write the .bed file, to make sure the MAF threshold and minor/major allele designations are correctly assigned post-Beaglerun_command_windows(glue("--bfile dgrp2_QC_all_lines_imputed"," --geno 0.1 --maf 0.05 --allow-no-sex", " --make-bed --out dgrp2_QC_all_lines_imputed_correct"), path ="/data/Derived/plink_output/")#unlink(list.files("data/derived", pattern = "~", full.names = TRUE)) # delete the original files, which were given a ~ file name by PLINK} else"Already run"
[1] "Already run"
Create a reduced list of LD-pruned SNPs with PLINK
To keep the computation time and memory usage manageable, we do not compute the effect sizes for every variant that passed the MAF and missingness quality control step above (1,646,661 variants). Instead, we estimate the effect sizes for a subset of variants that were approximately in linkage disequilibrium. We identified this LD-pruned set of variants using the PLINK arguments --indep-pairwise 100 10 0.2, which prunes within 100kB sliding windows, sliding 10 variants along with each step, and allows a maximum pairwise correlation (\(r^2\)) threshold of 0.2 between loci within the window. With these parameters, 1,419,902 variants were removed, leaving 226,770 check.
Show the code
# indep-pairwise arguments are: # 100kB window size, # variant count to shift the window by 10 variants at the end of each step, # pairwise r^2 threshold of 0.2if(!file.exists("data/Derived/plink_output/dgrp2_QC_all_lines_imputed_correct_pruned.prune.out")) {run_command_windows(glue("--bfile dgrp2_QC_all_lines_imputed_correct"," --indep-pairwise 100 10 0.2"), path ="data/Derived/plink_output/")}
Build GWAS function
Show the code
run_GWAS <-function(phenotypes){# Make a list of the lines in our sample and save as a text file for passing to PLINK lines_to_keep <- phenotypes %>% dplyr::select(line) %>%mutate(line_2 = line)write.table(lines_to_keep, row.names =FALSE, col.names =FALSE, file ="data/Derived/plink_output/lines_to_keep.txt", quote =FALSE)# Now cull the PLINK files to just the lines that we measured, and re-apply the # MAF cut-off of 0.05 for the new smaller sample of DGRP linesrun_command_windows(glue("--bfile dgrp2_QC_all_lines_imputed_correct"," --keep-allele-order", # force PLINK to retain the major/minor allele designations that apply to the DGRP as a whole" --keep lines_to_keep.txt --geno 0.1 --maf 0.05", " --make-bed --out dgrp2_QC_focal_lines"), path ="/data/Derived/plink_output/")# Define a function to add our phenotype data to a .fam file, which is needed for GWAS analysis and to make sure PLINK includes these samples# The 'phenotypes' data frame needs to have a column called 'line' add_phenotypes_to_fam <-function(filepath, phenotypes){read_delim(filepath, col_names =FALSE, delim =" ") %>% dplyr::select(X1, X2, X3, X4, X5) %>%# Get all the non-phenotype columnsleft_join(phenotypes, by =c("X1"="line")) %>%write.table(file ="data/Derived/plink_output/dgrp2_QC_focal_lines_NEW.fam", col.names =FALSE, row.names =FALSE, quote =FALSE, sep =" ")unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.fam")file.rename("data/Derived/plink_output/dgrp2_QC_focal_lines_NEW.fam", "data/Derived/plink_output/dgrp2_QC_focal_lines.fam") }# edit the new FAM file to add the phenotype data from 'phenotypes'add_phenotypes_to_fam("data/Derived/plink_output/dgrp2_QC_focal_lines.fam", phenotypes)# Run basic GWAS run_command_windows("--bfile dgrp2_QC_focal_lines --assoc --maf 0.05 --allow-no-sex", path ="/data/Derived/plink_output")# wrangle the GWAS results Focal_name <-deparse(substitute(phenotypes)) gwas_results <-read.table("data/Derived/plink_output/plink.qassoc", header =TRUE) %>% dplyr::select(SNP, BETA, SE, "T", P)# Rename and compress the GWAS summary stats file # The filter step means that only variants in the LD-pruned subset get saved to disk. gwas_results %>%# filter(SNP %in% (pull(read_tsv("data/Derived/plink_output/dgrp2_QC_all_lines_imputed_correct_pruned.prune.in", col_names = "SNP"), SNP))) %>% write_tsv(glue("data/Derived/GWAS_results/{Focal_name}.tsv.gz"))unlink("data/Derived/plink_output/plink.qassoc")# Rename the plink log filefile.rename("data/Derived/plink_output/plink.log",glue("data/Derived/plink_output/{Focal_name}_log.txt"))unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.bim")unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.bed")unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.fam")unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.log")}
Build a function to create manhattan plots for later
Show the code
build_manhattan_plot <-function(gwas_results){ manhattan_data <- gwas_results %>%mutate(position =str_split(SNP, "_"), # split the SNP name into logical bitschr =map_chr(position, ~ .x[1]), # the first bit is the chromosome arm - name the column appropriatelyposition =as.numeric(map_chr(position, ~ .x[2])), # where on the chromosome is the SNP foundpval =-1*log10(P)) %>%# make visualising the p values easier dplyr::select(chr, position, pval) %>%filter(chr !="4")# this next chunk finds convenient cuts for labels and colour changes max_pos <- manhattan_data %>%group_by(chr) %>%summarise(max_pos =max(position), middle_pos = (max_pos -min(position)) /2,.groups ="drop") %>%as.data.frame() max_pos$max_pos <-c(0, cumsum(max_pos$max_pos[1:4])) max_pos$label_pos <- max_pos$max_pos + max_pos$middle_pos# combine the two dataframes created above manhattan_data <- manhattan_data %>%left_join(max_pos, by ="chr") %>%mutate(position = position + max_pos,chromosome_colour =case_when(chr =="2L"| chr =="3L"| chr =="X"~"A",.default ="B"),Notable =if_else(pval >=-log10(1e-08), "YES", "NO")) plot <- manhattan_data %>%ggplot(aes(position, pval, group = chr, stroke =0.01)) +geom_point(aes(colour = chromosome_colour), size =1.6) +geom_hline(yintercept =-log10(1e-08), linetype =2, colour ="red", linewidth =1.2) +geom_hline(yintercept =-log10(1e-05), linetype =2, colour ="orange", linewidth =1.2) +scale_colour_manual(values =c(met.brewer(name ="Hokusai3")[3], met.brewer(name ="Hokusai3")[6])) +scale_x_continuous(breaks = max_pos$label_pos, labels = max_pos$chr) +scale_y_continuous(expand =c(0,0)) +ylab("-log~10~(_p_)") +xlab("Chromosome and position") +theme_classic() +theme(legend.position ="none",axis.title.y =element_markdown(size =18),axis.title.x =element_markdown(size =18),axis.text.x =element_text(size =15),axis.text.y =element_text(size =15)) }
Cross phenotype meta-analysis
The power to detect variants associated with correlated phenotypes can be increased if a meta-analytic approach is adopted (Zhu et al, 2018). Here, we use the CPASSOC approach developed by Zhou et al (2015), which evaluates the null hypothesis that SNPs are associated with none of the considered traits, weighted by the sample size of each study and adjusted for the trait correlation matrix. The steps to apply CPASSOC are as follows:
Estimate \(R\), the trait correlation matrix. In the DGRP, this can be done using SNP data or using line means.
GWAS each trait separately (see above).
Collate effect sizes for each trait into a vector \(\beta\), for each SNP.
Use a Wald test statistic to estimate a vector of p-values, \(T\), from the \(\beta\) and \(\sigma\) (which is approximated from the study sample size) estimates for each SNP.
Test whether \(\beta\) = 0. If the trait data are homogeneous (SNPs are expected to affect all traits in the same direction), use:
\[S_{Hom} = \frac{e^T(RW)^{-1}T(e^T(RW)^{-1}T)^T}{e^T(WRW)^{-1}e}\] where \(W\) is a diagonal matrix of weights for the individual test statistics (either the inverse of the variance or simply the sample size).
If there is heterogeneity between trait measures (i.e. it is a reasonable expectation that SNPs could affect some traits in one direction and others in the opposing direction), \(S_{Hom}\) is not appropriate. The ideal test statistic in this case would only include the cohorts and traits with a true contribution to the association of a genetic variant. To implement this, the value \(\tau\) is used as a threshold, below which traits are not included in the test statistic. This statistic, \(S_{Het}\) can be viewed as the maximum of the weighted sum of trait-specific test statistics satisfying different thresholds. To calculate \(S_{Het}\) first find,
\[S_{\tau} = \frac{e^T(R(\tau)W(\tau))^{-1}T(\tau)(e^T(R(\tau)W(\tau))^{-1}T(\tau))^T}{e^TW(\tau)^{-1}R(\tau)^{-1}W(\tau)^{-1}e}\] When \(\tau\) is large, \(S(\tau)\) can be undefined if the test statistic falls below \(\tau\) for all cohorts. In this case \(S(\tau) = 0\). Our test statistic is then
\[\displaystyle S_{Het} = \max_{\tau \gt 0} S(\tau)\] To generate p-values, \(S_{Het}\) is then compared with a gamma distribution of test-statistics.
Anyway, I’ve only included that for the mathematically inclined. Code to implement both statistics in R can be downloaded here, and is implemented below.
Table SX. Genotype to phenotype associations detected by univariate GWAS, for life expectancy. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.
Table SX. Genotype to phenotype associations detected by univariate GWAS, for lifespan equality. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.
We calculate the genetic correlations between traits using both the line mean and SNP effect size comparisons. We use the SNP correlations following Zhu et al (2015) and show these in the below plots. Note that while the two approaches produce results that have a strong positive correlation, there are small differences that might affect downstream analyses.
The purpose of this meta-analysis is to search for SNPs that have some effect on ageing, expressed in many different contexts (sexes, temperatures and mating status’).
To conduct CPASSOC for a given SNP, we need a summary statistic from each GWAS. A different number of lines were included in each GWAS, which can cause small differences in the number of SNPs assessed. We therefore prune the list of SNPs to those included in all univariate analyses. After pruning, 1,603,573 SNPs remain.
The Bonferroni adjusted significance threshold for this number of tests is \(p_{adj} = \frac{0.05}{1603573} = 3.12*10^{-8}\); here and for all future analysis, we use p\(< 10^{-8}\) as our significance threshold, as this is slightly more conservative and easier to quickly interpret.
Life expectancy
Show the code
Arya_f_l_T <- Arya_f_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_f = T)Huang_f_18_l_T <- Huang_f_18_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_18 = T)Huang_f_25_l_T <- Huang_f_25_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_25 = T)Huang_f_28_l_T <- Huang_f_28_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_28 = T)Wilson_f_l_1_T <- Wilson_f_l_1_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_25_1 = T)Wilson_f_l_2_T <- Wilson_f_l_2_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_25_2 = T)Durham_f_l_T <- Durham_f_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Durham_f_25 = T)Patel_f_l_T <- Patel_f_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Patel_f_23 = T)Arya_m_l_T <- Arya_m_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_m = T)Huang_m_18_l_T <- Huang_m_18_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_18 = T)Huang_m_25_l_T <- Huang_m_25_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_25 = T)Huang_m_28_l_T <- Huang_m_28_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_28 = T)all_e0_effect_sizes <- Arya_f_l_T %>%inner_join(Huang_f_18_l_T, by ="SNP") %>%inner_join(Huang_f_25_l_T, by ="SNP") %>%inner_join(Huang_f_28_l_T, by ="SNP") %>%inner_join(Wilson_f_l_1_T, by ="SNP") %>%inner_join(Wilson_f_l_2_T, by ="SNP") %>%inner_join(Durham_f_l_T, by ="SNP") %>%inner_join(Patel_f_l_T, by ="SNP") %>%inner_join(Arya_m_l_T, by ="SNP") %>%inner_join(Huang_m_18_l_T, by ="SNP") %>%inner_join(Huang_m_25_l_T, by ="SNP") %>%inner_join(Huang_m_28_l_T, by ="SNP")all_e0_effect_sizes_values <- all_e0_effect_sizes %>% dplyr::select(2:13)Sample_size_all <-c(165, 183, 186, 177, 161, 161, 176, 193, 165, 183, 186, 177) if(!file.exists("data/Derived/GWAS_results/all_e0_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(all_e0_effect_sizes_values, Sample_size_all, SNP_e0_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary (potentially in sign) for different traits e.g. if a SNP has a sex or enviornment opposite effect on lifespan)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_all, SNP_e0_corr_matrix);S_het <-SHet(all_e0_effect_sizes_values, Sample_size_all, SNP_e0_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)# bind meta statistics with the univariate effect sizesall_e0_meta_results <- all_e0_effect_sizes %>%bind_cols(p_S_hom, p_S_het) write_csv(all_e0_meta_results, "data/Derived/GWAS_results/all_e0_meta_results.csv")} else all_e0_meta_results <-read_csv("data/Derived/GWAS_results/all_e0_meta_results.csv")
Lifespan equality
Show the code
Arya_f_h_T <- Arya_f_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_f = T)Huang_f_18_h_T <- Huang_f_18_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_18 = T)Huang_f_25_h_T <- Huang_f_25_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_25 = T)Huang_f_28_h_T <- Huang_f_28_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_28 = T)Wilson_f_h_1_T <- Wilson_f_h_1_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_25_1 = T)Wilson_f_h_2_T <- Wilson_f_h_2_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_25_2 = T)Durham_f_h_T <- Durham_f_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Durham_f_25 = T)Patel_f_h_T <- Patel_f_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Patel_f_23 = T)Arya_m_h_T <- Arya_m_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_m = T)Huang_m_18_h_T <- Huang_m_18_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_18 = T)Huang_m_25_h_T <- Huang_m_25_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_25 = T)Huang_m_28_h_T <- Huang_m_28_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_28 = T)all_h_effect_sizes <- Arya_f_h_T %>%inner_join(Huang_f_18_h_T, by ="SNP") %>%inner_join(Huang_f_25_h_T, by ="SNP") %>%inner_join(Huang_f_28_h_T, by ="SNP") %>%inner_join(Wilson_f_h_1_T, by ="SNP") %>%inner_join(Wilson_f_h_2_T, by ="SNP") %>%inner_join(Durham_f_h_T, by ="SNP") %>%inner_join(Patel_f_h_T, by ="SNP") %>%inner_join(Arya_m_h_T, by ="SNP") %>%inner_join(Huang_m_18_h_T, by ="SNP") %>%inner_join(Huang_m_25_h_T, by ="SNP") %>%inner_join(Huang_m_28_h_T, by ="SNP") all_h_effect_sizes_values <- all_h_effect_sizes %>% dplyr::select(2:13)if(!file.exists("data/Derived/GWAS_results/all_h_meta_results.csv")) {S_hom <-SHom(all_h_effect_sizes_values, Sample_size_all, SNP_h_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary (potentially in sign) for different traits e.g. if a SNP has a sex or enviornment opposite effect on lifespan)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_all, SNP_h_corr_matrix);S_het <-SHet(all_h_effect_sizes_values, Sample_size_all, SNP_h_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)# bind meta statistics with the univariate effect sizesall_h_meta_results <- all_h_effect_sizes %>%bind_cols(p_S_hom, p_S_het)write_csv(all_h_meta_results, "data/Derived/GWAS_results/all_h_meta_results.csv")} else all_h_meta_results <-read_csv("data/Derived/GWAS_results/all_h_meta_results.csv")
Visualise the results
We combine GWAS summary statistics calculated from lifespan data measured across different contexts. It’s likely that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the S_het calculated p-values.
First lets show the effect of CPASSOC on the number of variants found to be associated with life expectancy and lifespan equality.
Table SX…th effect of CPASSOC on the number of variants detected with associations that exceed the significance threshold.
Table SX the variants that have significant associations with life expectancy.
Show the code
# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.life_expectancy_variants <- all_e0_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^18, 3)/10^18,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)life_expectancy_variants %>%my_data_table()
Table SX the variants that have significant associations with lifespan equality.
Show the code
# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.lifespan_equality_variants <- all_h_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^11, 3)/10^11,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)lifespan_equality_variants %>%my_data_table()
Now lets build some ‘Manhattan plots’ to show where these significant associations can be found:
Plot the univariate effect sizes for each of the SNPs associated with life expectancy / lifespan equality at the genome-wide significance threshold (p < \(0.05^{-8}\)) after CPASSOC.
Life expectancy
Show the code
SNP_heatmap_e0 <- all_e0_meta_results %>%filter(meta_p_het <1e-08) %>%arrange(meta_p_het) %>% dplyr::select(1:13) row_name <- SNP_heatmap_e0$SNPSNP_heatmap_e0 <- SNP_heatmap_e0 %>% dplyr::select(-SNP) %>%as.matrix()rownames(SNP_heatmap_e0) <- row_namebreaksList <-seq(-5, 5, by =0.1)annotation_SNPs <- all_e0_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP) %>%mutate(Chromosome =case_when(str_detect(SNP, "2L") ~"2L",str_detect(SNP, "2R") ~"2R",str_detect(SNP, "3L") ~"3L",str_detect(SNP, "3R") ~"3R",str_detect(SNP, "X") ~"X"))annotation_studies <-tibble(Study =c("Arya_f","Huang_f_18","Huang_f_25","Huang_f_28","Wilson_f_25_1","Wilson_f_25_2","Durham_f_25","Patel_f_23","Arya_m","Huang_m_18","Huang_m_25","Huang_m_28"),Temperature =c("25","18","25","28","25","25","25","23","25","18","25","28")) %>%mutate(Sex =case_when(str_detect(Study, "_f") ~"Female",.default ="Male"),Mating =case_when(str_detect(Study, "Arya") ~"NO",str_detect(Study, "Huang") ~"Throughout life",str_detect(Study, "Wilson") ~"Early life",str_detect(Study, "Durham") ~"Throughout life",str_detect(Study, "Patel") ~"Early life"),Author =str_extract(Study, ".*(?=\\_)"),Author =str_remove(Author, "_f"),Author =str_remove(Author, "_m"))# create a study annotation column, need this to be a data.frame rather than a tibble for some reason Study_details <- annotation_studies %>%as.data.frame() %>% dplyr::select(Study, Temperature, Mating)my_categories <-data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],Mating = Study_details[, 3])my_colors <-list(Temperature =c("18"="#7bbcd5", # sailboat colours from pnw"23"="#d0e2af","25"="#f5db99","28"="#e89c81"),Mating =c("NO"="#f8e3d1", # Shuksan from pnw"Early life"="#d7b1c5","Throughout life"="#ac8eab"),Chromosome =c("2L"="#d8aedd", # lake colours from pnw"2R"="#cb74ad","3L"="#11c2b5","3R"="#72e1e1","X"="#fbcc74"))# create a SNP annotation columnSNP_details <- annotation_SNPs %>%as.data.frame()my_SNP_categories <-data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])my_col_names <-c("Arya et al females", "Huang et al females", "Huang et al females","Huang et al females", "Wilson et al females 1", "Wilson et al females 2", "Durham et al females","Patel et al females", "Arya et al males", "Huang et al males", "Huang et al males","Huang et al males")pheatmap(SNP_heatmap_e0, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =6, cutree_cols =5, angle_col =45, border_color ="white",annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,fontsize =8, labels_col = my_col_names)
Plot the within-treatment intersex-genetic correlations
Show the code
pal <-pnw_palette("Shuksan2", 100)Arya_e0_sex_plot <- Arya_two_sexes_SNPS %>%ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female life expectancy", y ="Effect on male\nlife expectancy",title ="Arya et al 2010: 25C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Arya_h_sex_plot <- Arya_two_sexes_SNPS %>%ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female lifespan equality", y ="Effect on male\nlifespan equality",title ="Arya et al 2010: 25C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_18_e0_sex_plot <- Huang_18_two_sexes_SNPS %>%ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female life expectancy", y ="Effect on male\nlife expectancy",title ="Huang et al, 2020: 18C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_18_h_sex_plot <- Huang_18_two_sexes_SNPS %>%ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female lifespan equality", y ="Effect on male\nlifespan equality",title ="Huang et al, 2020: 18C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_25_e0_sex_plot <- Huang_25_two_sexes_SNPS %>%ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female life expectancy", y ="Effect on male\nlife expectancy",title ="Huang et al, 2020: 25C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_25_h_sex_plot <- Huang_25_two_sexes_SNPS %>%ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female lifespan equality", y ="Effect on male\nlifespan equality",title ="Huang et al, 2020: 25C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_28_e0_sex_plot <- Huang_28_two_sexes_SNPS %>%ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female life expectancy", y ="Effect on male\nlife expectancy",title ="Huang et al, 2020: 28C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_28_h_sex_plot <- Huang_28_two_sexes_SNPS %>%ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female lifespan equality", y ="Effect on male\nlifespan equality",title ="Huang et al, 2020: 28C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))(Arya_e0_sex_plot + Arya_h_sex_plot) / (Huang_18_e0_sex_plot + Huang_18_h_sex_plot) / (Huang_25_e0_sex_plot + Huang_25_h_sex_plot) / (Huang_28_e0_sex_plot + Huang_28_h_sex_plot)
Axis of antagonism and concordance
While there is a strong inter-sex genetic correlation for both life expectancy and lifespan equality, both correlations are significantly lower than one. Thus, it is likely that there are alleles that have sex-limited or sex-opposite effects on lifespan/lifespan equality. In an attempt to identify these genetic variants, we select those studies that measured lifespan in both sexes and calculate a sexual antagonism index for each line, following previous studies focused on similar questions (e.g. Berger et al, 2014, Grieshop and Arnqvist, 2018, Ruzicka et al, 2020). To create the index, we rotate the coordinate system of the female and male fitness plane by 45 degrees, by multiplying the 204 x 2 fitness matrix by the rotation matrix
Fig SX. Residual line means for female and male life expectancy. Points indicate a single genetic line. The sexual antagonism index ranges from male-beneficial and female-detrimental (orange points) to female-beneficial and male-detrimental (green points). The red dotted lines show the rotation that has been performed to create the antagonism and concordance axis.
How many variants do we find that have notable associations with the SA axis for life expectancy:
Table SX. Genotype to phenotype associations detected by univariate GWAS, for the life expectancy sexual antagonism axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.
Show the code
# filter down to sig associationsSA_e0_table <-bind_rows(SA_e0_Arya_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_e0_Arya_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_e0_Huang_18_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_e0_Huang_18_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_e0_Huang_25_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_e0_Huang_25_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_e0_Huang_28_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_e0_Huang_28_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?SA_p_05_SNPs_e0 <-bind_rows( SA_e0_Arya_GWAS %>%filter(P <1e-05), SA_e0_Huang_18_GWAS %>%filter(P <1e-05), SA_e0_Huang_25_GWAS %>%filter(P <1e-05), SA_e0_Huang_28_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()SA_e0_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= SA_p_05_SNPs_e0,`p < 1e-08 variants`=sum(SA_e0_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()
Study
Temperature
Mating status
p < 1e-05 variants
p < 1e-08 variants
Arya et al 2010
25
Virgin
13
0
Huang et al 2020
18
Mated
18
0
Huang et al 2020
25
Mated
305
1
Huang et al 2020
28
Mated
15
0
Totals
351
1
Table SX. Genotype to phenotype associations detected by univariate GWAS, for the life expectancy sexual concordance axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.
Show the code
# filter down to sig associationsSC_e0_table <-bind_rows(SC_e0_Arya_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_e0_Arya_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_e0_Huang_18_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_e0_Huang_18_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_e0_Huang_25_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_e0_Huang_25_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_e0_Huang_28_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_e0_Huang_28_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?SC_p_05_SNPs_e0 <-bind_rows( SC_e0_Arya_GWAS %>%filter(P <1e-05), SC_e0_Huang_18_GWAS %>%filter(P <1e-05), SC_e0_Huang_25_GWAS %>%filter(P <1e-05), SC_e0_Huang_28_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()SC_e0_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= SC_p_05_SNPs_e0,`p < 1e-08 variants`=sum(SC_e0_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()
Study
Temperature
Mating status
p < 1e-05 variants
p < 1e-08 variants
Arya et al 2010
25
Virgin
18
0
Huang et al 2020
18
Mated
24
0
Huang et al 2020
25
Mated
47
0
Huang et al 2020
28
Mated
31
0
Totals
119
0
Table SX. Genotype to phenotype associations detected by univariate GWAS, for the lifespan equality sexual antagonism axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.
Show the code
# filter down to sig associationsSA_h_table <-bind_rows(SA_h_Arya_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_h_Arya_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_h_Huang_18_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_h_Huang_18_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_h_Huang_25_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_h_Huang_25_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_h_Huang_28_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_h_Huang_28_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?SA_p_05_SNPs_h <-bind_rows( SA_h_Arya_GWAS %>%filter(P <1e-05), SA_h_Huang_18_GWAS %>%filter(P <1e-05), SA_h_Huang_25_GWAS %>%filter(P <1e-05), SA_h_Huang_28_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()SA_h_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= SA_p_05_SNPs_h,`p < 1e-08 variants`=sum(SA_h_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()
Study
Temperature
Mating status
p < 1e-05 variants
p < 1e-08 variants
Arya et al 2010
25
Virgin
1
0
Huang et al 2020
18
Mated
14
0
Huang et al 2020
25
Mated
20
0
Huang et al 2020
28
Mated
12
0
Totals
47
0
Table SX. Genotype to phenotype associations detected by univariate GWAS, for the lifespan equality sexual concordance axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.
Show the code
# filter down to sig associationsSC_h_table <-bind_rows(SC_h_Arya_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_h_Arya_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_h_Huang_18_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_h_Huang_18_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_h_Huang_25_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_h_Huang_25_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_h_Huang_28_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_h_Huang_28_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?SC_p_05_SNPs_h <-bind_rows( SC_h_Arya_GWAS %>%filter(P <1e-05), SC_h_Huang_18_GWAS %>%filter(P <1e-05), SC_h_Huang_25_GWAS %>%filter(P <1e-05), SC_h_Huang_28_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()SC_h_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= SC_p_05_SNPs_h,`p < 1e-08 variants`=sum(SC_h_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()
Study
Temperature
Mating status
p < 1e-05 variants
p < 1e-08 variants
Arya et al 2010
25
Virgin
22
0
Huang et al 2020
18
Mated
2
0
Huang et al 2020
25
Mated
37
0
Huang et al 2020
28
Mated
52
0
Totals
100
0
Run CPASSOC
Generate the genetic correlation matrix
We calculate the genetic correlations between traits using both the line mean and SNP effect size comparisons. We use the SNP correlations following Zhu et al (2015) and show these in the below plots. Note that while the two approaches produce results that have a strong positive correlation, there are small differences that will affect downstream analyses.
The purpose of these meta-analysis is to search for SNPs that have a) sex-specific and b) sexually concordant effects on ageing.
To conduct CPASSOC for a given SNP, we need a summary statistic from each GWAS. A different number of lines were included in each GWAS, which can cause small differences in the number of SNPs assessed. We therefore prune the list of SNPs to those included in all univariate analyses. After pruning, 1,619,501 SNPs remain.
Sex-antagonism and concordance for life expectancy
First run CPASSOC for the SA axis
Show the code
SA_Arya_l_T <- SA_e0_Arya_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya = T)SA_Huang_18_l_T <- SA_e0_Huang_18_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_18 = T)SA_Huang_25_l_T <- SA_e0_Huang_25_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_25 = T)SA_Huang_28_l_T <- SA_e0_Huang_28_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_28 = T)SA_e0_effect_sizes <- SA_Arya_l_T %>%inner_join(SA_Huang_18_l_T, by ="SNP") %>%inner_join(SA_Huang_25_l_T, by ="SNP") %>%inner_join(SA_Huang_28_l_T, by ="SNP") SA_e0_effect_size_values <- SA_e0_effect_sizes %>% dplyr::select(2:5)Sample_size_SA <-c(165, 183, 186, 177) if(!file.exists("data/Derived/GWAS_results/SA_e0_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(SA_e0_effect_size_values, Sample_size_SA, SNP_SA_e0_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_SA, SNP_SA_e0_corr_matrix);S_het <-SHet(SA_e0_effect_size_values, Sample_size_SA, SNP_SA_e0_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)SA_e0_meta_results <- SA_e0_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(SA_e0_meta_results, "data/Derived/GWAS_results/SA_e0_meta_results.csv")} else SA_e0_meta_results <-read_csv("data/Derived/GWAS_results/SA_e0_meta_results.csv")
Now run it for the SC axis
Show the code
SC_Arya_l_T <- SC_e0_Arya_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya = T)SC_Huang_18_l_T <- SC_e0_Huang_18_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_18 = T)SC_Huang_25_l_T <- SC_e0_Huang_25_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_25 = T)SC_Huang_28_l_T <- SC_e0_Huang_28_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_28 = T)SC_e0_effect_sizes <- SC_Arya_l_T %>%inner_join(SC_Huang_18_l_T, by ="SNP") %>%inner_join(SC_Huang_25_l_T, by ="SNP") %>%inner_join(SC_Huang_28_l_T, by ="SNP") SC_e0_effect_size_values <- SC_e0_effect_sizes %>% dplyr::select(2:5)Sample_size_SC <-c(165, 183, 186, 177) if(!file.exists("data/Derived/GWAS_results/SC_e0_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(SC_e0_effect_size_values, Sample_size_SC, SNP_SC_e0_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_SC, SNP_SC_e0_corr_matrix);S_het <-SHet(SC_e0_effect_size_values, Sample_size_SC, SNP_SC_e0_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)SC_e0_meta_results <- SC_e0_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(SC_e0_meta_results, "data/Derived/GWAS_results/SC_e0_meta_results.csv")} else SC_e0_meta_results <-read_csv("data/Derived/GWAS_results/SC_e0_meta_results.csv")
Sex-antagonism and concordance for lifespan equality
Sexual antagonism
Show the code
# lifespan equalitySA_Arya_h_T <- SA_h_Arya_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya = T)SA_Huang_18_h_T <- SA_h_Huang_18_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_18 = T)SA_Huang_25_h_T <- SA_h_Huang_25_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_25 = T)SA_Huang_28_h_T <- SA_h_Huang_28_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_28 = T)SA_h_effect_sizes <- SA_Arya_h_T %>%inner_join(SA_Huang_18_h_T, by ="SNP") %>%inner_join(SA_Huang_25_h_T, by ="SNP") %>%inner_join(SA_Huang_28_h_T, by ="SNP") SA_h_effect_size_values <- SA_h_effect_sizes %>% dplyr::select(2:5)if(!file.exists("data/Derived/GWAS_results/SA_h_meta_results.csv")) {S_hom <-SHom(SA_h_effect_size_values, Sample_size_SA, SNP_SA_h_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (probably not such a problem here given all traits we investigate are some version of life expectancy)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_SA, SNP_SA_h_corr_matrix);S_het <-SHet(SA_h_effect_size_values, Sample_size_SA, SNP_SA_h_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)SA_h_meta_results <- SA_h_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(SA_h_meta_results, "data/Derived/GWAS_results/SA_h_meta_results.csv")} else SA_h_meta_results <-read_csv("data/Derived/GWAS_results/SA_h_meta_results.csv")
Sexual concordance
Show the code
# lifespan equalitySC_Arya_h_T <- SC_h_Arya_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya = T)SC_Huang_18_h_T <- SC_h_Huang_18_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_18 = T)SC_Huang_25_h_T <- SC_h_Huang_25_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_25 = T)SC_Huang_28_h_T <- SC_h_Huang_28_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_28 = T)SC_h_effect_sizes <- SC_Arya_h_T %>%inner_join(SC_Huang_18_h_T, by ="SNP") %>%inner_join(SC_Huang_25_h_T, by ="SNP") %>%inner_join(SC_Huang_28_h_T, by ="SNP") SC_h_effect_size_values <- SC_h_effect_sizes %>% dplyr::select(2:5)if(!file.exists("data/Derived/GWAS_results/SC_h_meta_results.csv")) {S_hom <-SHom(SC_h_effect_size_values, Sample_size_SC, SNP_SC_h_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (probably not such a problem here given all traits we investigate are some version of life expectancy)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_SC, SNP_SC_h_corr_matrix);S_het <-SHet(SC_h_effect_size_values, Sample_size_SC, SNP_SC_h_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)SC_h_meta_results <- SC_h_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(SC_h_meta_results, "data/Derived/GWAS_results/SC_h_meta_results.csv")} else SC_h_meta_results <-read_csv("data/Derived/GWAS_results/SC_h_meta_results.csv")
Visualise the results
We combine GWAS summary statistics calculated from lifespan data measured across different contexts. It’s possible that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the S_het calculated p-values.
First lets show the effect of CPASSOC on the number of variants found to be associated with life expectancy and lifespan equality.
Table XX. Genotype to phenotype associations detected by univariate GWAS, for lifespan equality. The total row shows the number of unique candidate variants identified across all studies.
Table SX…variants that have sex-specific associations with life expectancy.
Show the code
# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.SA_e0_variants <- SA_e0_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^11, 2)/10^11,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)SA_e0_variants %>%my_data_table()
Table SX…variants that have sexually concordant associations with life expectancy.
Show the code
# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.SC_e0_variants <- SC_e0_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^18, 2)/10^18,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)SC_e0_variants %>%my_data_table()
Table SX…variants that have sexually concordant associations with life expectancy. There are no variants with strong sex-specific associations with lifespan equality.
Show the code
# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.SC_h_variants <- SC_h_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^8, 4)/10^8,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)SC_h_variants %>%my_data_table()
Now lets build some ‘Manhattan plots’ to show where these significant associations can be found:
Show the code
SA_e0_results <- SA_e0_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het) SC_e0_results <- SC_e0_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)SA_h_results <- SA_h_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)SC_h_results <- SC_h_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)# plot the results using the Manhattan plot custom function we defined earlierSA_e0_het_plot <-build_manhattan_plot(SA_e0_results) +labs(title ="SA axis for life expectancy") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 15)) +xlab(NULL)SC_e0_het_plot <-build_manhattan_plot(SC_e0_results) +labs(title ="SC axis for life expectancy") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 15))SA_h_het_plot <-build_manhattan_plot(SA_h_results) +labs(title ="SA axis for lifespan equality") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 15)) +xlab(NULL)SC_h_het_plot <-build_manhattan_plot(SC_h_results) +labs(title ="SC axis for lifespan equality") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 15))SA_e0_het_plot + SA_h_het_plot + SC_e0_het_plot + SC_h_het_plot +plot_layout(ncol =2)
Figure SX. xxx
Investigating the rate of ageing and the scaling of mortality risk
Creating axes of ageing rate and scaling
We’ve shown that perpendicular deviation from the regression of lifespan equality on life expectancy closely corresponds to the rate of ageing (\(b\)) parameter in a gompertz mortality model. To identify regions of the genome associated with the rate of ageing, we can calculate a rate of ageing index for each line in each treatment, by once again using rotation matrices. To create this index, we rotate the coordinate system of the life expectancy and lifespan equality plane by \(\theta\) degrees, where \(\theta\) is the angle between the positive x-axis and the regression of lifespan equality on life expectancy (mean centered and standarised to \(\sigma\) = 1).
With regression coefficients found, we can use the following formula to calculate the angle (in radians) between the mean regression line and the x-axis:
\(\theta = tan^{-1}(\beta)\)
where \(\beta\) is the point estimate of the slope from each model posterior distribution.
We then rotate the coordinate system of the life expectancy and lifespan equality plane clockwise by \(\theta\):
\[x' = xcos(\theta) - ysin(\theta),\]\[y' = xsin(\theta) + ycos(\theta),\] where \(x'\) and \(y'\) are the vectors of ageing rate and scaling, and \(x\) and \(y\) are life expectancy and lifespan equality.
Using SNP effect sizes, we calculate the genetic correlations between a) rates of ageing and b) scaling of mortality, measured in different environments.
Show the code
# use the BETA coefficients to build the SNP correlation matrix for the rate of ageingSNP_ageing_axis_data <-bind_rows( Arya_f_ageing_GWAS %>%mutate(Study ="Arya_2010", Temperature =25, Sex ="Female"), Arya_m_ageing_GWAS %>%mutate(Study ="Arya_2010", Temperature =25, Sex ="Male"), Huang_f_18_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =18, Sex ="Female"), Huang_m_18_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =18, Sex ="Male"), Huang_f_25_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =25, Sex ="Female"), Huang_m_25_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =25, Sex ="Male"), Huang_f_28_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =28, Sex ="Female"), Huang_m_28_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =28, Sex ="Male"), Wilson_f_ageing_1_GWAS %>%mutate(Study ="Wilson_2020_1", Temperature =25, Sex ="Female"), Wilson_f_ageing_2_GWAS %>%mutate(Study ="Wilson_2020_2", Temperature =25, Sex ="Female"), Durham_f_ageing_GWAS %>%mutate(Study ="Durham_2014", Temperature =25, Sex ="Female"), Patel_f_ageing_GWAS %>%mutate(Study ="Patel_2021", Temperature =23, Sex ="Female")) %>% dplyr::select(SNP, BETA, Study, Temperature, Sex) %>%pivot_wider(values_from = BETA, names_from =c(Study, Temperature, Sex)) %>% dplyr::select(-SNP) SNP_ageing_axis_corr_matrix <-cor(SNP_ageing_axis_data, use ="pairwise.complete.obs", method ="spearman")# use the BETA coefficients to build the SNP correlation matrix for scalingSNP_scaling_axis_data <-bind_rows( Arya_f_scaling_GWAS %>%mutate(Study ="Arya_2010", Temperature =25, Sex ="Female"), Arya_m_scaling_GWAS %>%mutate(Study ="Arya_2010", Temperature =25, Sex ="Male"), Huang_f_18_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =18, Sex ="Female"), Huang_m_18_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =18, Sex ="Male"), Huang_f_25_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =25, Sex ="Female"), Huang_m_25_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =25, Sex ="Male"), Huang_f_28_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =28, Sex ="Female"), Huang_m_28_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =28, Sex ="Male"), Wilson_f_scaling_1_GWAS %>%mutate(Study ="Wilson_2020_1", Temperature =25, Sex ="Female"), Wilson_f_scaling_2_GWAS %>%mutate(Study ="Wilson_2020_2", Temperature =25, Sex ="Female"), Durham_f_scaling_GWAS %>%mutate(Study ="Durham", Temperature =25, Sex ="Female"), Patel_f_scaling_GWAS %>%mutate(Study ="Patel", Temperature =23, Sex ="Female")) %>% dplyr::select(SNP, BETA, Study, Temperature, Sex) %>%pivot_wider(values_from = BETA, names_from =c(Study, Temperature, Sex)) %>% dplyr::select(-SNP) SNP_scaling_axis_corr_matrix <-cor(SNP_scaling_axis_data, use ="pairwise.complete.obs", method ="spearman")
Calculate meta-analytic test statistics
The purpose of these meta-analyses is to search for SNPs that affect 1) the rate of ageing and 2) the scaling of the risk of mortality.
Run CPASSOC for the rate of ageing
Show the code
# rate of ageingageing_axis_Arya_f_T <- Arya_f_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_f = T)ageing_axis_Arya_m_T <- Arya_m_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_m = T)ageing_axis_Huang_f_18_T <- Huang_f_18_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_18 = T)ageing_axis_Huang_m_18_T <- Huang_m_18_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_18 = T)ageing_axis_Huang_f_25_T <- Huang_f_25_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_25 = T)ageing_axis_Huang_m_25_T <- Huang_m_25_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_25 = T)ageing_axis_Huang_f_28_T <- Huang_f_28_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_28 = T)ageing_axis_Huang_m_28_T <- Huang_m_28_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_28 = T)ageing_axis_Wilson_f_1_T <- Wilson_f_ageing_1_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_1 = T)ageing_axis_Wilson_f_2_T <- Wilson_f_ageing_2_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_2 = T)ageing_axis_Durham_f_T <- Durham_f_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Durham_f = T)ageing_axis_Patel_f_T <- Patel_f_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Patel_f = T)ageing_axis_effect_sizes <- ageing_axis_Arya_f_T %>%inner_join(ageing_axis_Arya_m_T, by ="SNP") %>%inner_join(ageing_axis_Huang_f_18_T, by ="SNP") %>%inner_join(ageing_axis_Huang_m_18_T, by ="SNP") %>%inner_join(ageing_axis_Huang_f_25_T, by ="SNP") %>%inner_join(ageing_axis_Huang_m_25_T, by ="SNP") %>%inner_join(ageing_axis_Huang_f_28_T, by ="SNP") %>%inner_join(ageing_axis_Huang_m_28_T, by ="SNP") %>%inner_join(ageing_axis_Wilson_f_1_T, by ="SNP") %>%inner_join(ageing_axis_Wilson_f_2_T, by ="SNP") %>%inner_join(ageing_axis_Durham_f_T, by ="SNP") %>%inner_join(ageing_axis_Patel_f_T, by ="SNP") ageing_axis_effect_size_values <- ageing_axis_effect_sizes %>% dplyr::select(2:13)Sample_size_ageing_axis <-c(165, 165, 183, 183, 186, 186, 177, 177, 161, 161, 176, 193)if(!file.exists("data/Derived/GWAS_results/ageing_axis_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(ageing_axis_effect_size_values, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix);S_het <-SHet(ageing_axis_effect_size_values, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)ageing_axis_meta_results <- ageing_axis_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(ageing_axis_meta_results, "data/Derived/GWAS_results/ageing_axis_meta_results.csv")} else ageing_axis_meta_results <-read_csv("data/Derived/GWAS_results/ageing_axis_meta_results.csv")
Run CPASSOC for the scaling of mortality risk
Show the code
# scalingscaling_axis_Arya_f_T <- Arya_f_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_f = T)scaling_axis_Arya_m_T <- Arya_m_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_m = T)scaling_axis_Huang_f_18_T <- Huang_f_18_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_18 = T)scaling_axis_Huang_m_18_T <- Huang_m_18_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_18 = T)scaling_axis_Huang_f_25_T <- Huang_f_25_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_25 = T)scaling_axis_Huang_m_25_T <- Huang_m_25_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_25 = T)scaling_axis_Huang_f_28_T <- Huang_f_28_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_28 = T)scaling_axis_Huang_m_28_T <- Huang_m_28_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_28 = T)scaling_axis_Wilson_f_1_T <- Wilson_f_scaling_1_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_1 = T)scaling_axis_Wilson_f_2_T <- Wilson_f_scaling_2_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_2 = T)scaling_axis_Durham_f_T <- Durham_f_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Durham_f = T)scaling_axis_Patel_f_T <- Patel_f_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Patel_f = T)scaling_axis_effect_sizes <- scaling_axis_Arya_f_T %>%inner_join(scaling_axis_Arya_m_T, by ="SNP") %>%inner_join(scaling_axis_Huang_f_18_T, by ="SNP") %>%inner_join(scaling_axis_Huang_m_18_T, by ="SNP") %>%inner_join(scaling_axis_Huang_f_25_T, by ="SNP") %>%inner_join(scaling_axis_Huang_m_25_T, by ="SNP") %>%inner_join(scaling_axis_Huang_f_28_T, by ="SNP") %>%inner_join(scaling_axis_Huang_m_28_T, by ="SNP") %>%inner_join(scaling_axis_Wilson_f_1_T, by ="SNP") %>%inner_join(scaling_axis_Wilson_f_2_T, by ="SNP") %>%inner_join(scaling_axis_Durham_f_T, by ="SNP") %>%inner_join(scaling_axis_Patel_f_T, by ="SNP") scaling_axis_effect_size_values <- scaling_axis_effect_sizes %>% dplyr::select(2:13)Sample_size_scaling_axis <-c(165, 165, 183, 183, 186, 186, 177, 177, 161, 161, 176, 193)if(!file.exists("data/Derived/GWAS_results/scaling_axis_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(scaling_axis_effect_size_values, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (e.g. if a SNP has a sex or enviornment opposite effect on lifespan)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix);S_het <-SHet(scaling_axis_effect_size_values, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)scaling_axis_meta_results <- scaling_axis_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(scaling_axis_meta_results, "data/Derived/GWAS_results/scaling_axis_meta_results.csv")} else scaling_axis_meta_results <-read_csv("data/Derived/GWAS_results/scaling_axis_meta_results.csv")
Visualise the results
We combine GWAS summary statistics calculated from ageing and scaling axis data measured across different contexts. It’s possible that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the S_het calculated p-values.
First lets show the effect of CPASSOC on the number of variants found to be associated with the rate of ageing and the scaling of mortality risk.
Table XX. Genotype to phenotype associations detected by univariate GWAS, for the ageing rate and scaling of mortality axes. The total row shows the number of unique candidate variants identified across all studies.
Table SX…variants that affect our proxy for the rate of ageing.
Show the code
# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.ageing_rate_genes <- ageing_axis_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^8, 5)/10^8,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)ageing_rate_genes %>%my_data_table()
Table SX…variants that affect our proxy for scaling.
Now lets build some ‘Manhattan plots’ to show where these significant associations can be found:
Show the code
ageing_axis_results <- ageing_axis_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)scaling_axis_results <- scaling_axis_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)# plot the results using the manhattan plot custom function we defined earlierageing_axis_het_plot <-build_manhattan_plot(ageing_axis_results) +labs(title ="Ageing rate") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 12.5))scaling_axis_het_plot <-build_manhattan_plot(scaling_axis_results) +labs(title ="Scaling of mortality") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 12.5))ageing_axis_het_plot + scaling_axis_het_plot
Plot the univariate effect sizes for each of the SNPs associated with the rate of ageing at the genome-wide significance threshold (p < \(0.05^{-8}\)) after CPASSOC.
Show the code
SNP_heatmap_ageing_axis <- ageing_axis_meta_results %>%filter(meta_p_het <1e-08) %>%arrange(meta_p_het) %>% dplyr::select(1:13) row_name <- SNP_heatmap_ageing_axis$SNPSNP_heatmap_ageing_axis <- SNP_heatmap_ageing_axis %>% dplyr::select(-SNP) %>%as.matrix()rownames(SNP_heatmap_ageing_axis) <- row_namebreaksList <-seq(-5, 5, by =0.1)annotation_SNPs <- ageing_axis_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP) %>%mutate(Chromosome =case_when(str_detect(SNP, "2L") ~"2L",str_detect(SNP, "2R") ~"2R",str_detect(SNP, "3L") ~"3L",str_detect(SNP, "3R") ~"3R",str_detect(SNP, "X") ~"X"))annotation_studies <-tibble(Study =c("Arya_f","Arya_m","Huang_f_18","Huang_m_18","Huang_f_25","Huang_m_25","Huang_f_28","Huang_m_28","Wilson_f_1","Wilson_f_2","Durham_f_25","Patel_f_23"),Temperature =c("25","25","18","18","25","25","28","28","25","25","25","23")) %>%mutate(Sex =case_when(str_detect(Study, "_f") ~"Female",.default ="Male"),Mating =case_when(str_detect(Study, "Arya") ~"NO",str_detect(Study, "Huang") ~"Throughout life",str_detect(Study, "Wilson") ~"Early life",str_detect(Study, "Durham") ~"Throughout life",str_detect(Study, "Patel") ~"Early life"),Author =str_extract(Study, ".*(?=\\_)"),Author =str_remove(Author, "_f"),Author =str_remove(Author, "_m"))# create a study annotation column, need this to be a data.frame rather than a tibble for some reason Study_details <- annotation_studies %>%as.data.frame() %>% dplyr::select(Study, Temperature, Mating)my_categories <-data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],Mating = Study_details[, 3])my_colors <-list(Temperature =c("18"="#7bbcd5", # sailboat colours from pnw"23"="#d0e2af","25"="#f5db99","28"="#e89c81"),Mating =c("NO"="#f8e3d1", # Shuksan from pnw"Early life"="#d7b1c5","Throughout life"="#ac8eab"),Chromosome =c("2L"="#d8aedd", # lake colours from pnw"2R"="#cb74ad","3L"="#11c2b5","3R"="#72e1e1","X"="#fbcc74"))# create a SNP annotation columnSNP_details <- annotation_SNPs %>%as.data.frame()my_SNP_categories <-data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])my_col_names <-c("Arya et al females", "Arya et al males", "Huang et al females", "Huang et al males", "Huang et al females", "Huang et al males","Huang et al females", "Huang et al males", "Wilson et al females 1","Wilson et al females 2", "Durham et al females","Patel et al females")pheatmap(SNP_heatmap_ageing_axis, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =4, cutree_cols =4, angle_col =45, border_color ="white",annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,fontsize =8, labels_col = my_col_names)
Plot the univariate effect sizes for each of the SNPs associated with the scaling of mortality risk at the genome-wide significance threshold (p < \(0.05^{-8}\)) after CPASSOC.
Show the code
SNP_heatmap_scaling_axis <- scaling_axis_meta_results %>%filter(meta_p_het <1e-08) %>%arrange(meta_p_het) %>% dplyr::select(1:13) row_name <- SNP_heatmap_scaling_axis$SNPSNP_heatmap_scaling_axis <- SNP_heatmap_scaling_axis %>% dplyr::select(-SNP) %>%as.matrix()rownames(SNP_heatmap_scaling_axis) <- row_namebreaksList <-seq(-5, 5, by =0.1)annotation_SNPs <- scaling_axis_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP) %>%mutate(Chromosome =case_when(str_detect(SNP, "2L") ~"2L",str_detect(SNP, "2R") ~"2R",str_detect(SNP, "3L") ~"3L",str_detect(SNP, "3R") ~"3R",str_detect(SNP, "X") ~"X"))annotation_studies <-tibble(Study =c("Arya_f","Arya_m","Huang_f_18","Huang_m_18","Huang_f_25","Huang_m_25","Huang_f_28","Huang_m_28","Wilson_f_1","Wilson_f_2","Durham_f_25","Patel_f_23"),Temperature =c("25","25","18","18","25","25","28","28","25","25","25","23")) %>%mutate(Sex =case_when(str_detect(Study, "_f") ~"Female",.default ="Male"),Mating =case_when(str_detect(Study, "Arya") ~"NO",str_detect(Study, "Huang") ~"Throughout life",str_detect(Study, "Wilson") ~"Early life",str_detect(Study, "Durham") ~"Throughout life",str_detect(Study, "Patel") ~"Early life"),Author =str_extract(Study, ".*(?=\\_)"),Author =str_remove(Author, "_f"),Author =str_remove(Author, "_m"))# create a study annotation column, need this to be a data.frame rather than a tibble for some reason Study_details <- annotation_studies %>%as.data.frame() %>% dplyr::select(Study, Temperature, Mating)my_categories <-data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],Mating = Study_details[, 3])my_colors <-list(Temperature =c("18"="#7bbcd5", # sailboat colours from pnw"23"="#d0e2af","25"="#f5db99","28"="#e89c81"),Mating =c("NO"="#f8e3d1", # Shuksan from pnw"Early life"="#d7b1c5","Throughout life"="#ac8eab"),Chromosome =c("2L"="#d8aedd", # lake colours from pnw"2R"="#cb74ad","3L"="#11c2b5","3R"="#72e1e1","X"="#fbcc74"))# create a SNP annotation columnSNP_details <- annotation_SNPs %>%as.data.frame()my_SNP_categories <-data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])my_col_names <-c("Arya et al females", "Arya et al males", "Huang et al females", "Huang et al males", "Huang et al females", "Huang et al males","Huang et al females", "Huang et al males", "Wilson et al females 1","Wilson et al females 2", "Durham et al females","Patel et al females")pheatmap(SNP_heatmap_scaling_axis, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =5, cutree_cols =4, angle_col =45, border_color ="white",annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,fontsize =8, labels_col = my_col_names)
There are several genomic regions where we detect multiple variants with very similar to identical effect sizes, indicating linkage disequilibrium. In these cases, the variant with the true causal effect can’t be identified (assuming there is a true phenotypic effect).
Source Code
---title: "Genome wide analyses"format: htmleditor: sourceexecute: warning: false message: false---# Load packages and dataNote that the `MASS` package is required to run the CPASSOC. Unfortunately this clashes with the `dplyr``select()`. So be prepared to run `detach("package:MASS", unload=TRUE)` or use `dplyr::select()` to get some things to work if you're moving back and forth throughout the code.```{r}#| results: hidelibrary(tidyverse) # tidy coding, ggplot etclibrary(glue) # for coding within stringslibrary(bigsnpr) # to install: devtools::install_github("privefl/bigsnpr")library(pander) # for tableslibrary(brms) # for bayesian modelslibrary(ggtext) # for markdown syntax in ggplotlibrary(MetBrewer) # for more colour paletteslibrary(MoMAColors) # nicer colours once againlibrary(PNWColors) # further nicer colourslibrary(patchwork) # building composite plotslibrary(DT) # for nice tableslibrary(kableExtra) # for more nice tableslibrary(ggrepel) # for labelling ggplotslibrary(pheatmap) # for heat mapslibrary(MASS) # needed for CPASSOClibrary(Matrix) # needed for CPASSOC# build a helper function that produces a table to display our data# Create a function to build HTML searchable tablesmy_data_table <-function(df){datatable( df, rownames=FALSE,autoHideNavigation =TRUE,extensions =c("Scroller", "Buttons"),options =list(autoWidth =TRUE,dom ='Bfrtip',deferRender=TRUE,scrollX=TRUE, scrollY=1000,scrollCollapse=TRUE,buttons =list('pageLength', 'colvis', 'csv', list(extend ='pdf',pageSize ='A4',orientation ='landscape',filename ='Lifespan_data')),pageLength =100 ) )}```## Load variant/gene annotationsDGRP variant annotations were downloaded from the [DGRP website](http://dgrp2.gnets.ncsu.edu/data/website/dgrp.fb557.annot.txt) and gene annotations for all the genes covered by DGRP variants, from the org.Dm.eg.db database object from Bioconductor.These will be useful later when we aim to identify whether variants with notable associations with a trait overlap with any genes. ```{r}#| results: hide# Helper function to split a vector into chunks chunker <-function(x, max_chunk_size) split(x, ceiling(seq_along(x) / max_chunk_size))if(!file.exists("data/derived/annotations.csv")){# Load annotation file, get important info annot <-read.table("data/input/dgrp.fb557.annot.txt", header =FALSE, stringsAsFactors =FALSE) get.info <-function(rows){lapply(rows, function(row){ site.class.field <-strsplit(annot$V3[row], split ="]")[[1]][1] num.genes <-str_count(site.class.field, ";") +1 output <-cbind(rep(annot$V1[row], num.genes), do.call("rbind", lapply(strsplit(site.class.field, split =";")[[1]], function(x) strsplit(x, split ="[|]")[[1]])))if(ncol(output) ==5) return(output[,c(1,2,4,5)]) # only return SNPs that have some annotation. Don't get the gene symbolelsereturn(NULL) }) %>%do.call("rbind", .) } variant.details <-lapply(chunker(1:nrow(annot), max_chunk_size =10000), get.info) %>%do.call("rbind", .) %>%as.data.frame()names(variant.details) <-c("SNP", "FBID", "site.class", "distance.to.gene") variant.details$FBID <-unlist(str_extract_all(variant.details$FBID, "FBgn[:digit:]+")) # clean up text strings for Flybase ID variant.details %>% dplyr::filter(site.class !="FBgn0003638") %>%# NB this is a bug in the DGRP's annotation filemutate(chr =str_remove_all(substr(SNP, 1, 2), "_")) # get chromosome now for faster sorting later annotations <- variant.details} else annotations <-read_csv("data/derived/annotations.csv")annotations <- annotations %>%left_join(read.csv("data/Input/all_dmel_genes.csv")) %>% dplyr::select(SNP, FBID, site.class, distance.to.gene, gene_name, chromosome)```## Loading data used in GWA testsIn the `demographic component` of this study, we calculated mean values and standard error for each combination of line, sex, study, temperature and mating status. These data are displayed, and can be downloaded from, the below table. Note that for GWA and other SNP based analysis, we removed lines that had not been genotyped (shown as `Genotyped = NO`). Lines with unknown genotypes also have unknown Wolbachia and inversions status'. Durham et al (2014) cleared all lines of Wolbachia via treatment with tetracycline. ```{r}#| results: hidegenotyped_lines <-read_csv("data/input/Genotyped_lines.csv") %>%mutate(Genotyped ="YES",line =as.factor(line))full_dataset <-read.csv("data/input/lifespan_data.csv") %>%as_tibble() %>%mutate(line =as.factor(Line),Treatment =str_replace(Treatment, " ", "_")) %>% dplyr::select(-Line) %>%left_join(genotyped_lines, by ="line") %>%mutate(Genotyped =if_else(is.na(Genotyped), "NO", Genotyped)) %>% dplyr::select(line, Sex, Temperature, Mated, Study, Treatment, Block, e0, SE_e0, h, SE_h, samp, Genotyped)# DGRP studies often correct for the most common inversions and wolbachia presence. inversions_wolbachia <-read_csv("data/Input/inversions_wolbachia.csv") %>%mutate(line =as.factor(str_remove(line, "DGRP_")),Wolbachia =if_else(Wolbachia =="y", 1, 0),across(2:17, ~case_when(.x =="ST"~0, .x =="INV/ST"~1, .x =="INV"~2))) %>% dplyr::select(line, `In(2L)t`, `In(2R)NS`, `In(3R)P`, `In(3R)K`, `In(3R)Mo`, Wolbachia) %>%rename(In_2L_t =`In(2L)t`,In_2R_NS =`In(2R)NS`,In_3R_P =`In(3R)P`,In_3R_K =`In(3R)K`,In_3R_Mo =`In(3R)Mo`)# inversions pruned to those Huang et al 2015 PNAS corrected forfull_dataset <- full_dataset %>%left_join(inversions_wolbachia) %>%mutate(Wolbachia =if_else(Study =="Durham_2014", 0, Wolbachia)) # study cleared wolbachia with tetracycline before phenotyping my_data_table(full_dataset %>%mutate(across(8:11, ~round(.x, 2))) %>% dplyr::select(1:13))```For GWAS and later CPASSOC, we split the data by study, removed studies that phenotyped < 100 lines and adjusted line means to account for experimental block where applicable. Importantly, we also split the Wilson et al (2020) data by dietary treatment; while we do not explicitly consider diet in our analysis, lifespan in one dietary treatment is considered a separate trait from lifespan measured in a second dietary treatment.```{r}Arya_2010_f <- full_dataset %>%filter(Study =="Arya_2010"& Sex =="Female"& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Arya_2010_m <- full_dataset %>%filter(Study =="Arya_2010"& Sex =="Male"& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_f_18 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Female"& Temperature ==18& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_m_18 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Male"& Temperature ==18& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_f_25 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Female"& Temperature ==25& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_m_25 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Male"& Temperature ==25& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_f_28 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Female"& Temperature ==28& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Huang_2020_m_28 <- full_dataset %>%filter(Study =="Huang_2020"& Sex =="Male"& Temperature ==28& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)# In this study, some lines were measured twice per treatment, and a small subset were measured three times. We take the mean across blocks as the line mean, following the original study.Wilson_2020_f_1 <- full_dataset %>%filter(Treatment =="Wilson_2020_1"& Genotyped =="YES") %>%group_by(line) %>%mutate(e0 =mean(e0),h =mean(h)) %>%ungroup() %>%distinct(line, .keep_all =TRUE) %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Wilson_2020_f_2 <- full_dataset %>%filter(Treatment =="Wilson_2020_2"& Genotyped =="YES") %>%group_by(line) %>%mutate(e0 =mean(e0),h =mean(h)) %>%ungroup() %>%distinct(line, .keep_all =TRUE) %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)Durham_2014_f <- full_dataset %>%filter(Study =="Durham_2014"& Genotyped =="YES") %>%mutate(Block =as.factor(Block))# statistically account for the effect of blockDurham_model_e0 <-brm(e0 ~1+ Block + (1|line),data = Durham_2014_f, family = gaussian,prior =c(prior(normal(50, 15), class = Intercept),prior(normal(0, 10), class = b),prior(exponential(1), class = sigma)),chains =4, cores =4, iter =6000, warmup =2000,seed =1, file ="fits/Durham_model_e0")Durham_model_h <-brm(h ~1+ Block + (1|line),data = Durham_2014_f, family = gaussian,prior =c(prior(exponential(1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),chains =4, cores =4, iter =6000, warmup =2000,seed =1, file ="fits/Durham_model_h")Durham_2014_f_adjusted <-as_draws_df(Durham_model_e0) %>% dplyr::select(b_Intercept, contains("r_line")) %>%# mutate each draw to show the lines standardised lifespan in Block 1 (denoted b_Intercept in brms output)mutate(across(2:177, ~ .x + b_Intercept)) %>% dplyr::select(-b_Intercept) %>%pivot_longer(cols =everything(), names_to ="line", values_to ="e0") %>%group_by(line) %>%summarise(e0 =mean(e0)) %>%mutate(line =str_remove(line, fixed("r_line[")),line =str_remove(line, fixed(",Intercept]"))) %>%left_join(as_draws_df(Durham_model_h) %>% dplyr::select(b_Intercept, contains("r_line")) %>%# mutate each draw to show the lines standardised lifespan in Block 1 (denoted b_Intercept in brms output)mutate(across(2:177, ~ .x + b_Intercept)) %>% dplyr::select(-b_Intercept) %>%pivot_longer(cols =everything(), names_to ="line", values_to ="h") %>%group_by(line) %>%summarise(h =mean(h)) %>%mutate(line =str_remove(line, fixed("r_line[")),line =str_remove(line, fixed(",Intercept]"))) ) %>%left_join(Durham_2014_f %>%distinct(line, .keep_all = T) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment)) %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h))Patel_2021_f <- full_dataset %>%filter(Study =="Patel_2021"& Genotyped =="YES") %>%mutate(e0_scaled =scale(e0),h_scaled =scale(h)) %>% dplyr::select(line, Sex, Temperature, Mated, Treatment, e0, e0_scaled, h, h_scaled)```# Preparing for univariate GWASThe preparation of data for GWAS generally follows Holman and Wong's DGRP GWAS of fitness in different contexts. See their associated `workflowr`[report](https://lukeholman.github.io/fitnessGWAS/index.html) for a comprehensive breakdown of their analysis.## Install neccessary software and build helper functionsIn addition to the `R` packages we load, `plink 1.9` and `beagle` must also be installed. These software packages allow us to wrangle the data into the correct format and impute missing values, both of which are required to run GWAS with the `Gemma` R package (**I'm still using PLINK for GWAS right now**).```{r}# build functions to prepare data for GWASprep_for_e0_GWAS <-function(data, sex){data %>%mutate(line =glue("line{line}")) %>% dplyr::select(line, e0_scaled)}prep_for_h_GWAS <-function(data, sex){data %>%#inner_join(read_csv("data/Input/Genotyped_lines.csv"), by = "line") %>% # filter to lines that have been genotypedmutate(line =glue("line{line}")) %>% dplyr::select(line, h) %>%mutate(h =scale(h))}prep_for_SA_GWAS <-function(data){data %>% dplyr::select(line, SA_axis)}prep_for_SC_GWAS <-function(data){data %>% dplyr::select(line, SC_axis)}prep_for_ageing_GWAS <-function(data){ data %>%mutate(line =glue("line{line}")) %>% dplyr::select(line, ageing_axis)}prep_for_scaling_GWAS <-function(data){ data %>%mutate(line =glue("line{line}")) %>% dplyr::select(line, scaling_axis)}# I used bigsnpr::download_plink(dir = "code/windows") which puts it in the code folder - I'm using a windows operating system. The macOS version can also be downloaded into "code/macOS" # Beagle is a software package for phasing genotypes and imputing ungenotyped markers.plink <-paste(getwd(), "code/windows/plink", sep ="/") #windows#plink <- paste(getwd(), "code/macOS/plink", sep = "/") #macOS#beagle <- bigsnpr::download_beagle(# dir = "/Users/tkeaney/Library/CloudStorage/OneDrive-JGU/Postdoc/DGRP_lifespan/DGRP_lifespan_inequality/code/macOS") # only need to download this once - change path depending on operating system# helper function to pass commands to the terminal# Note that we set `intern = TRUE`, and pass the result of `system()` to `cat()`, # ensuring that the Terminal output will be printed in this knitr report.# # This is the mac OS function#run_command_mac <- function(shell_command, wd = getwd(), path = ""){# cat(system(glue("cd ", wd, path, # tell terminal which directory to work in# "\n",shell_command), # on a second terminal line, run the plink command# intern = TRUE), sep = '\n') #}# This is the windows function run_command_windows <-function(plink_command, wd =getwd(), path ="") {# Specify the full path to the plink executable within the 'code' subdirectory. plink_path <-paste(getwd(), "code/windows/plink", sep ="/")# Create the full shell command with the plink executable. command <-glue("cmd.exe /c cd /d {shQuote(file.path(wd, path))} && {shQuote(plink_path)} {plink_command}")# Execute the combined command. result <-system(command, intern =TRUE)# Print the result.cat(result, sep ='\n')# Return the result as a character vector.return(result)}# sometimes we need to run terminal commands without plink - create a slightly different function to do thisrun_command_no_plink <-function(shell_command, wd =getwd(), path ="") {# Create the full shell command with the plink executable. command <-glue("cmd.exe /c cd /d {shQuote(file.path(wd, path))} && {shell_command}")# Execute the combined command. result <-system(command, intern =TRUE)# Print the result.cat(result, sep ='\n')# Return the result as a character vector.return(result)}```## Perform SNP quality control and impute missing dataPlink recognises three types of files that are necessary for GWA analysis: the `.bed`, `.bim` and `.fam` files. `.bed`: the binary biallelic genotype table. Four options are possible: - 00 = homozygous for minor allele- 01 = missing genotype- 10 = heterozygous genotype- 11 = homozygous for major alleleThe overwhelming majority of genotypes in the DGRP are homozygous for one of the alleles.`.bim`: extended variant information file accompanying the `.bed` file. It has six fields:1. chromosome code2. variant identifier3. position in morgans4. base-pair coordinate5. Minor allele6. Major allele`.fam`: Plink sample information file. It can have the following elements:1. Family ID ('FID') (in our case just the DGRP line)2. Within-family ID ('IID'; cannot be '0') (in our case just the DGRP line)3. Within-family ID of father ('0' if father isn't in dataset)4. Within-family ID of mother ('0' if mother isn't in dataset)5. Sex code ('1' = male, '2' = female, '0' = unknown) - not important for us because we analyse the sexes separately.6. Phenotype value ('1' = control, '2' = case, '-9'/'0'/non-numeric = missing data) - 9 for us because we supply the phenotypic data laterWe cleaned up the DGRP’s .bed/.bim/.fam files (available from the Mackay lab [website](http://dgrp2.gnets.ncsu.edu/)) by:1. Removing any SNPs for which genotypes are missing for >10% of the 205 DGRP lines. We then use the software Beagle to impute the remaining missing genotypes. Imputation takes a long time so ideally, you only want to do it once.2. Removing SNPs with a minor allele frequency of less than 5% across the 205 lines. We have negligible power to detect associations for rare SNPs below this threshold.In the Plink-formatted genotype files, lines fixed for the major allele are coded as 2, and lines fixed for the minor allele as 0. This means that in the association tests we calculate, negative effect sizes mean that the minor allele is associated with lower trait values, while positive effect sizes means that the minor allele is associated with higher trait values.```{r}Run_function <-FALSE# Change this to TRUE to run - read through the code before you do this if(Run_function){# Use Plink to clean and subset the DGRP's SNP data as follows:# Only keep SNPs for which at least 90% of the 205 DGRP lines were successfully genotyped (--geno 0.1)# Only keep SNPs with a minor allele frequency of 0.05 or higher (--maf 0.05) across the 205 lines# Write the processed BIM/BED/FAM files to the data/Derived/plink_output directory output_directory <-paste(getwd(), "data/Derived/plink_output", sep ="/")run_command_windows(glue("--bfile dgrp2"," --geno 0.1 --maf 0.05 --allow-no-sex", " --make-bed --out {shQuote(output_directory)}/dgrp2_QC_all_lines"), path ="data/Input/bfiles/")# Use the shell command 'sed' to remove underscores from the DGRP line names in the .fam file (e.g. 'line_120' becomes 'line120')# Otherwise, these underscores cause trouble when we need to convert from PLINK to vcf format (vcf format uses underscore as a separator)# this works on mac but not on windowsfor(i in1:2) run_command_no_plink("sed -i '' 's/_//' dgrp2_QC_all_lines.fam", path ="/data/Derived/")# Now impute the missing genotypes using Beagle# This part uses the data for the full DGRP panel of >200 lines, to infer missing genotypes as accurately as possible. # The bigsnpr package provides a helpful wrapper for Beagle called snp_beagleImpute(): it translates to a VCF file and back again using PLINK# Imputation with the below optimisation took about an hour on my computer, which is not especially powerfulsnp_beagleImpute(beagle, plink, bedfile.in ="data/Derived/plink_output/dgrp2_QC_all_lines.bed", bedfile.out ="data/Derived/plink_output/dgrp2_QC_all_lines_imputed.bed",ncores =4, memory.max =16)# assign a sex of 'female' to all the DGRP lines (Beagle removes the sex, and it seems PLINK needs individuals to have a sex, # despite PLINK having a command called --allow-no-sex)run_command_windows("sed -i '' 's/ 0 0 0/ 0 0 2/' dgrp2_QC_all_lines_imputed.fam", path ="/data/Derived/plink_output/")# Re-write the .bed file, to make sure the MAF threshold and minor/major allele designations are correctly assigned post-Beaglerun_command_windows(glue("--bfile dgrp2_QC_all_lines_imputed"," --geno 0.1 --maf 0.05 --allow-no-sex", " --make-bed --out dgrp2_QC_all_lines_imputed_correct"), path ="/data/Derived/plink_output/")#unlink(list.files("data/derived", pattern = "~", full.names = TRUE)) # delete the original files, which were given a ~ file name by PLINK} else"Already run"```## Create a reduced list of LD-pruned SNPs with PLINKTo keep the computation time and memory usage manageable, we do not compute the effect sizes for every variant that passed the MAF and missingness quality control step above (1,646,661 variants). Instead, we estimate the effect sizes for a subset of variants that were approximately in linkage disequilibrium. We identified this LD-pruned set of variants using the PLINK arguments `--indep-pairwise 100 10 0.2`, which prunes within 100kB sliding windows, sliding 10 variants along with each step, and allows a maximum pairwise correlation ($r^2$) threshold of 0.2 between loci within the window. With these parameters, 1,419,902 variants were removed, leaving **226,770 check**. ```{r}# indep-pairwise arguments are: # 100kB window size, # variant count to shift the window by 10 variants at the end of each step, # pairwise r^2 threshold of 0.2if(!file.exists("data/Derived/plink_output/dgrp2_QC_all_lines_imputed_correct_pruned.prune.out")) {run_command_windows(glue("--bfile dgrp2_QC_all_lines_imputed_correct"," --indep-pairwise 100 10 0.2"), path ="data/Derived/plink_output/")}```## Build GWAS function```{r}run_GWAS <-function(phenotypes){# Make a list of the lines in our sample and save as a text file for passing to PLINK lines_to_keep <- phenotypes %>% dplyr::select(line) %>%mutate(line_2 = line)write.table(lines_to_keep, row.names =FALSE, col.names =FALSE, file ="data/Derived/plink_output/lines_to_keep.txt", quote =FALSE)# Now cull the PLINK files to just the lines that we measured, and re-apply the # MAF cut-off of 0.05 for the new smaller sample of DGRP linesrun_command_windows(glue("--bfile dgrp2_QC_all_lines_imputed_correct"," --keep-allele-order", # force PLINK to retain the major/minor allele designations that apply to the DGRP as a whole" --keep lines_to_keep.txt --geno 0.1 --maf 0.05", " --make-bed --out dgrp2_QC_focal_lines"), path ="/data/Derived/plink_output/")# Define a function to add our phenotype data to a .fam file, which is needed for GWAS analysis and to make sure PLINK includes these samples# The 'phenotypes' data frame needs to have a column called 'line' add_phenotypes_to_fam <-function(filepath, phenotypes){read_delim(filepath, col_names =FALSE, delim =" ") %>% dplyr::select(X1, X2, X3, X4, X5) %>%# Get all the non-phenotype columnsleft_join(phenotypes, by =c("X1"="line")) %>%write.table(file ="data/Derived/plink_output/dgrp2_QC_focal_lines_NEW.fam", col.names =FALSE, row.names =FALSE, quote =FALSE, sep =" ")unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.fam")file.rename("data/Derived/plink_output/dgrp2_QC_focal_lines_NEW.fam", "data/Derived/plink_output/dgrp2_QC_focal_lines.fam") }# edit the new FAM file to add the phenotype data from 'phenotypes'add_phenotypes_to_fam("data/Derived/plink_output/dgrp2_QC_focal_lines.fam", phenotypes)# Run basic GWAS run_command_windows("--bfile dgrp2_QC_focal_lines --assoc --maf 0.05 --allow-no-sex", path ="/data/Derived/plink_output")# wrangle the GWAS results Focal_name <-deparse(substitute(phenotypes)) gwas_results <-read.table("data/Derived/plink_output/plink.qassoc", header =TRUE) %>% dplyr::select(SNP, BETA, SE, "T", P)# Rename and compress the GWAS summary stats file # The filter step means that only variants in the LD-pruned subset get saved to disk. gwas_results %>%# filter(SNP %in% (pull(read_tsv("data/Derived/plink_output/dgrp2_QC_all_lines_imputed_correct_pruned.prune.in", col_names = "SNP"), SNP))) %>% write_tsv(glue("data/Derived/GWAS_results/{Focal_name}.tsv.gz"))unlink("data/Derived/plink_output/plink.qassoc")# Rename the plink log filefile.rename("data/Derived/plink_output/plink.log",glue("data/Derived/plink_output/{Focal_name}_log.txt"))unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.bim")unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.bed")unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.fam")unlink("data/Derived/plink_output/dgrp2_QC_focal_lines.log")} ```## Build a function to create manhattan plots for later```{r}build_manhattan_plot <-function(gwas_results){ manhattan_data <- gwas_results %>%mutate(position =str_split(SNP, "_"), # split the SNP name into logical bitschr =map_chr(position, ~ .x[1]), # the first bit is the chromosome arm - name the column appropriatelyposition =as.numeric(map_chr(position, ~ .x[2])), # where on the chromosome is the SNP foundpval =-1*log10(P)) %>%# make visualising the p values easier dplyr::select(chr, position, pval) %>%filter(chr !="4")# this next chunk finds convenient cuts for labels and colour changes max_pos <- manhattan_data %>%group_by(chr) %>%summarise(max_pos =max(position), middle_pos = (max_pos -min(position)) /2,.groups ="drop") %>%as.data.frame() max_pos$max_pos <-c(0, cumsum(max_pos$max_pos[1:4])) max_pos$label_pos <- max_pos$max_pos + max_pos$middle_pos# combine the two dataframes created above manhattan_data <- manhattan_data %>%left_join(max_pos, by ="chr") %>%mutate(position = position + max_pos,chromosome_colour =case_when(chr =="2L"| chr =="3L"| chr =="X"~"A",.default ="B"),Notable =if_else(pval >=-log10(1e-08), "YES", "NO")) plot <- manhattan_data %>%ggplot(aes(position, pval, group = chr, stroke =0.01)) +geom_point(aes(colour = chromosome_colour), size =1.6) +geom_hline(yintercept =-log10(1e-08), linetype =2, colour ="red", linewidth =1.2) +geom_hline(yintercept =-log10(1e-05), linetype =2, colour ="orange", linewidth =1.2) +scale_colour_manual(values =c(met.brewer(name ="Hokusai3")[3], met.brewer(name ="Hokusai3")[6])) +scale_x_continuous(breaks = max_pos$label_pos, labels = max_pos$chr) +scale_y_continuous(expand =c(0,0)) +ylab("-log~10~(_p_)") +xlab("Chromosome and position") +theme_classic() +theme(legend.position ="none",axis.title.y =element_markdown(size =18),axis.title.x =element_markdown(size =18),axis.text.x =element_text(size =15),axis.text.y =element_text(size =15)) }```# Cross phenotype meta-analysisThe power to detect variants associated with correlated phenotypes can be increased if a meta-analytic approach is adopted ([Zhu et al, 2018](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0193256)). Here, we use the `CPASSOC` approach developed by [Zhou _et al_ (2015)](https://www.sciencedirect.com/science/article/pii/S0002929714004777?via%3Dihub), which evaluates the null hypothesis that SNPs are associated with none of the considered traits, weighted by the sample size of each study and adjusted for the trait correlation matrix. The steps to apply `CPASSOC` are as follows:1. Estimate $R$, the trait correlation matrix. In the DGRP, this can be done using SNP data or using line means.2. GWAS each trait separately (see above).3. Collate effect sizes for each trait into a vector **$\beta$**, for each SNP.4. Use a Wald test statistic to estimate a vector of _p_-values, $T$, from the $\beta$ and $\sigma$ (which is approximated from the study sample size) estimates for each SNP.5. Test whether **$\beta$** = **0**. If the trait data are homogeneous (SNPs are expected to affect all traits in the same direction), use:$$S_{Hom} = \frac{e^T(RW)^{-1}T(e^T(RW)^{-1}T)^T}{e^T(WRW)^{-1}e}$$where $W$ is a diagonal matrix of weights for the individual test statistics (either the inverse of the variance or simply the sample size).6. If there is heterogeneity between trait measures (i.e. it is a reasonable expectation that SNPs could affect some traits in one direction and others in the opposing direction), $S_{Hom}$ is not appropriate. The ideal test statistic in this case would only include the cohorts and traits with a true contribution to the association of a genetic variant. To implement this, the value $\tau$ is used as a threshold, below which traits are not included in the test statistic. This statistic, $S_{Het}$ can be viewed as the maximum of the weighted sum of trait-specific test statistics satisfying different thresholds. To calculate $S_{Het}$ first find,$$S_{\tau} = \frac{e^T(R(\tau)W(\tau))^{-1}T(\tau)(e^T(R(\tau)W(\tau))^{-1}T(\tau))^T}{e^TW(\tau)^{-1}R(\tau)^{-1}W(\tau)^{-1}e}$$When $\tau$ is large, $S(\tau)$ can be undefined if the test statistic falls below $\tau$ for all cohorts. In this case $S(\tau) = 0$. Our test statistic is then$$\displaystyle S_{Het} = \max_{\tau \gt 0} S(\tau)$$To generate _p_-values, $S_{Het}$ is then compared with a gamma distribution of test-statistics.Anyway, I've only included that for the mathematically inclined. Code to implement both statistics in `R` can be downloaded [here](http://hal.case.edu/~xxz10/zhu-web/), and is implemented below.## The functionsThese are directly loaded from [here](http://hal.case.edu/~xxz10/zhu-web/)```{r}require(compiler)enableJIT(4)Non_Trucated_TestScore <-function(X, SampleSize, CorrMatrix){ Wi =matrix(SampleSize, nrow =1); sumW =sqrt(sum(Wi^2)); W = Wi / sumW; Sigma =ginv(CorrMatrix); XX =apply(X, 1, function(x) { x1 <-matrix(x, ncol =length(x), nrow =1); T = W %*% Sigma %*%t(x1); T = (T*T) / (W %*% Sigma %*%t(W));return(T[1,1]); } );return(XX);}SHom <-cmpfun(Non_Trucated_TestScore);Trucated_TestScore <-function(X, SampleSize, CorrMatrix, correct =1, startCutoff =0, endCutoff =1, CutoffStep =0.05, isAllpossible = T){ N =dim(X)[2]; Wi =matrix(SampleSize, nrow =1); sumW =sqrt(sum(Wi^2)); W = Wi / sumW; XX =apply(X, 1, function(x) { TTT =-1;if (isAllpossible == T ) { cutoff =sort(unique(abs(x))); ## it will filter out any of them. } else { cutoff =seq(startCutoff, endCutoff, CutoffStep); }for (threshold in cutoff) { x1 = x; index =which(abs(x1) < threshold);if (length(index) == N) break; A = CorrMatrix; W1 = W;if (length(index) !=0 ) { x1 = x1[-index]; A = A[-index, -index]; ## update the matrix W1 = W[-index]; }if (correct ==1) { index =which(x1 <0);if (length(index) !=0) { W1[index] =-W1[index]; ## update the sign } } A =ginv(A); x1 =matrix(x1, nrow =1); W1 =matrix(W1, nrow =1); T = W1 %*% A %*%t(x1); T = (T*T) / (W1 %*% A %*%t(W1));if (TTT < T[1,1]) TTT = T[1,1]; }return(TTT); } );return(XX);}SHet <-cmpfun(Trucated_TestScore);EstimateGamma <-function (N =1E6, SampleSize, CorrMatrix, correct =1, startCutoff =0, endCutoff =1, CutoffStep =0.05, isAllpossible = T) { Wi =matrix(SampleSize, nrow =1); sumW =sqrt(sum(Wi^2)); W = Wi / sumW; Permutation =mvrnorm(n = N, mu =c(rep(0, length(SampleSize))), Sigma = CorrMatrix, tol =1e-8, empirical = F); Stat =Trucated_TestScore(X = Permutation, SampleSize = SampleSize, CorrMatrix = CorrMatrix,correct = correct, startCutoff = startCutoff, endCutoff = endCutoff,CutoffStep = CutoffStep, isAllpossible = isAllpossible); a =min(Stat)*3/4 ex3 =mean(Stat*Stat*Stat) V =var(Stat);for (i in1:100){ E =mean(Stat)-a; k = E^2/V theta = V/E a = (-3*k*(k+1)*theta**2+sqrt(9*k**2*(k+1)**2*theta**4-12*k*theta*(k*(k+1)*(k+2)*theta**3-ex3)))/6/k/theta } para =c(k,theta,a);return(para);}EmpDist <-function (N =1E6, SampleSize, CorrMatrix, correct =1, startCutoff =0, endCutoff =1, CutoffStep =0.05, isAllpossible = T) { Wi =matrix(SampleSize, nrow =1); sumW =sqrt(sum(Wi^2)); W = Wi / sumW; Permutation =mvrnorm(n = N, mu =c(rep(0, length(SampleSize))), Sigma = CorrMatrix, tol =1e-8, empirical = F); Stat =Trucated_TestScore(X = Permutation, SampleSize = SampleSize, CorrMatrix = CorrMatrix, correct = correct, startCutoff = startCutoff, endCutoff = endCutoff, CutoffStep = CutoffStep, isAllpossible = isAllpossible);return(Stat);}```# Analysing life expectancy and lifespan equality## Univariate GWAS```{r}# prepare phenotype data for GWASArya_f_l <-prep_for_e0_GWAS(Arya_2010_f)Arya_m_l <-prep_for_e0_GWAS(Arya_2010_m)Arya_f_h <-prep_for_h_GWAS(Arya_2010_f)Arya_m_h <-prep_for_h_GWAS(Arya_2010_m)Huang_f_18_l <-prep_for_e0_GWAS(Huang_2020_f_18)Huang_f_18_h <-prep_for_h_GWAS(Huang_2020_f_18)Huang_m_18_l <-prep_for_e0_GWAS(Huang_2020_m_18)Huang_m_18_h <-prep_for_h_GWAS(Huang_2020_m_18)Huang_f_25_l <-prep_for_e0_GWAS(Huang_2020_f_25)Huang_f_25_h <-prep_for_h_GWAS(Huang_2020_f_25)Huang_m_25_l <-prep_for_e0_GWAS(Huang_2020_m_25)Huang_m_25_h <-prep_for_h_GWAS(Huang_2020_m_25)Huang_f_28_l <-prep_for_e0_GWAS(Huang_2020_f_28)Huang_f_28_h <-prep_for_h_GWAS(Huang_2020_f_28)Huang_m_28_l <-prep_for_e0_GWAS(Huang_2020_m_28)Huang_m_28_h <-prep_for_h_GWAS(Huang_2020_m_28)Wilson_f_l_1 <-prep_for_e0_GWAS(Wilson_2020_f_1)Wilson_f_h_1 <-prep_for_h_GWAS(Wilson_2020_f_1)Wilson_f_l_2 <-prep_for_e0_GWAS(Wilson_2020_f_2)Wilson_f_h_2 <-prep_for_h_GWAS(Wilson_2020_f_2)Durham_f_l <-prep_for_e0_GWAS(Durham_2014_f_adjusted)Durham_f_h <-prep_for_h_GWAS(Durham_2014_f_adjusted)Patel_f_l <-prep_for_e0_GWAS(Patel_2021_f)Patel_f_h <-prep_for_h_GWAS(Patel_2021_f)# if not already done, run the GWA testsif(!file.exists("data/Derived/GWAS_results/Arya_f_l.tsv.gz")) {run_GWAS(Arya_f_l)run_GWAS(Arya_m_l)run_GWAS(Arya_f_h)run_GWAS(Arya_m_h)run_GWAS(Huang_f_18_l)run_GWAS(Huang_f_18_h)run_GWAS(Huang_m_18_l)run_GWAS(Huang_m_18_h)run_GWAS(Huang_f_25_l)run_GWAS(Huang_f_25_h)run_GWAS(Huang_m_25_l)run_GWAS(Huang_m_25_h)run_GWAS(Huang_f_28_l)run_GWAS(Huang_f_28_h)run_GWAS(Huang_m_28_l)run_GWAS(Huang_m_28_h)run_GWAS(Wilson_f_l_1)run_GWAS(Wilson_f_h_1)run_GWAS(Wilson_f_l_2)run_GWAS(Wilson_f_h_2)run_GWAS(Durham_f_l)run_GWAS(Durham_f_h)run_GWAS(Patel_f_l)run_GWAS(Patel_f_h)}```As a point of comparison, we find the sum of significant associations detected by univariate GWAS```{r}# load GWAS results# Life expectancyArya_f_l_GWAS <-read_tsv("data/Derived/GWAS_results/Arya_f_l.tsv.gz") Huang_f_18_l_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_18_l.tsv.gz")Huang_f_25_l_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_25_l.tsv.gz") Huang_f_28_l_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_28_l.tsv.gz")Wilson_f_l_1_GWAS <-read_tsv("data/Derived/GWAS_results/Wilson_f_l_1.tsv.gz") Wilson_f_l_2_GWAS <-read_tsv("data/Derived/GWAS_results/Wilson_f_l_2.tsv.gz") Durham_f_l_GWAS <-read_tsv("data/Derived/GWAS_results/Durham_f_l.tsv.gz")Patel_f_l_GWAS <-read_tsv("data/Derived/GWAS_results/Patel_f_l.tsv.gz")Arya_m_l_GWAS <-read_tsv("data/Derived/GWAS_results/Arya_m_l.tsv.gz")Huang_m_18_l_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_18_l.tsv.gz")Huang_m_25_l_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_25_l.tsv.gz")Huang_m_28_l_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_28_l.tsv.gz")# Lifespan equalityArya_f_h_GWAS <-read_tsv("data/Derived/GWAS_results/Arya_f_h.tsv.gz")Huang_f_18_h_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_18_h.tsv.gz")Huang_f_25_h_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_25_h.tsv.gz") Huang_f_28_h_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_28_h.tsv.gz")Wilson_f_h_1_GWAS <-read_tsv("data/Derived/GWAS_results/Wilson_f_h_1.tsv.gz")Wilson_f_h_2_GWAS <-read_tsv("data/Derived/GWAS_results/Wilson_f_h_2.tsv.gz")Durham_f_h_GWAS <-read_tsv("data/Derived/GWAS_results/Durham_f_h.tsv.gz")Patel_f_h_GWAS <-read_tsv("data/Derived/GWAS_results/Patel_f_h.tsv.gz")Arya_m_h_GWAS <-read_tsv("data/Derived/GWAS_results/Arya_m_h.tsv.gz")Huang_m_18_h_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_18_h.tsv.gz")Huang_m_25_h_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_25_h.tsv.gz")Huang_m_28_h_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_28_h.tsv.gz")```**Table SX**. Genotype to phenotype associations detected by univariate GWAS, for **life expectancy**. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis. ```{r}# filter down to sig associationse0_table <-bind_rows( Arya_f_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Arya_f_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Treatment ="1",Sex ="Female",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_f_18_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_18_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Female",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_f_25_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_25_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_f_28_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_28_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Female",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Wilson_f_l_1_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Wilson_f_l_1_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Wilson et al 2020",Treatment ="1",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Wilson_f_l_2_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Wilson_f_l_2_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Wilson et al 2020*",Treatment ="2",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Durham_f_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Durham_f_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Durham et al 2014",Treatment ="1",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Patel_f_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Patel_f_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Patel et al 2021",Treatment ="1",Sex ="Female",Temperature ="23",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Arya_m_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Arya_m_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Treatment ="1",Sex ="Male",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_m_18_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_18_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Male",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_m_25_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_25_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Male",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_m_28_l_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_28_l_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Male",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`) ) # how many unique variants have been detected?p_05_SNPs <-bind_rows( Arya_f_l_GWAS %>%filter(P <1e-05), Arya_m_l_GWAS %>%filter(P <1e-05), Huang_f_18_l_GWAS %>%filter(P <1e-05), Huang_f_25_l_GWAS %>%filter(P <1e-05), Huang_f_28_l_GWAS %>%filter(P <1e-05), Huang_m_18_l_GWAS %>%filter(P <1e-05), Huang_m_25_l_GWAS %>%filter(P <1e-05), Huang_m_28_l_GWAS %>%filter(P <1e-05), Wilson_f_l_1_GWAS %>%filter(P <1e-05), Wilson_f_l_2_GWAS %>%filter(P <1e-05), Durham_f_l_GWAS %>%filter(P <1e-05), Patel_f_l_GWAS %>%filter(P <1e-05) ) %>%distinct(SNP) %>%nrow()e0_table %>%add_row(Study ="Totals",Sex ="",Temperature ="",`Mating status`="",`p < 1e-05 variants`= p_05_SNPs,`p < 1e-08 variants`=sum(e0_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()```**Table SX**. Genotype to phenotype associations detected by univariate GWAS, for **lifespan equality**. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis. ```{r}# filter down to sig associationsh_table <-bind_rows( Arya_f_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Arya_f_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Treatment ="1",Sex ="Female",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_f_18_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_18_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Female",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_f_25_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_25_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_f_28_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_28_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Female",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Wilson_f_h_1_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Wilson_f_h_1_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Wilson et al 2020",Treatment ="1",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Wilson_f_h_2_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Wilson_f_h_2_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Wilson et al 2020*",Treatment ="2",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Durham_f_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Durham_f_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Durham et al 2014",Treatment ="1",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Patel_f_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Patel_f_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Patel et al 2021",Treatment ="1",Sex ="Female",Temperature ="23",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Arya_m_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Arya_m_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Treatment ="1",Sex ="Male",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_m_18_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_18_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Male",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_m_25_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_25_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Male",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`), Huang_m_28_h_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_28_h_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Treatment ="1",Sex ="Male",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`) ) # how many unique variants have been detected?p_05_SNPs_h <-bind_rows( Arya_f_h_GWAS %>%filter(P <1e-05), Arya_m_h_GWAS %>%filter(P <1e-05), Huang_f_18_h_GWAS %>%filter(P <1e-05), Huang_f_25_h_GWAS %>%filter(P <1e-05), Huang_f_28_h_GWAS %>%filter(P <1e-05), Huang_m_18_h_GWAS %>%filter(P <1e-05), Huang_m_25_h_GWAS %>%filter(P <1e-05), Huang_m_28_h_GWAS %>%filter(P <1e-05), Wilson_f_h_1_GWAS %>%filter(P <1e-05), Wilson_f_h_2_GWAS %>%filter(P <1e-05), Durham_f_h_GWAS %>%filter(P <1e-05), Patel_f_h_GWAS %>%filter(P <1e-05) ) %>%distinct(SNP) %>%nrow()h_table %>%add_row(Study ="Totals",Sex ="",Temperature ="",`Mating status`="",`p < 1e-05 variants`= p_05_SNPs_h,`p < 1e-08 variants`=sum(h_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()```## Applying cross-phenotype meta-analysis### Generate the genetic correlation matrixWe calculate the genetic correlations between traits using both the line mean and SNP effect size comparisons. We use the SNP correlations following Zhu et al (2015) and show these in the below plots. Note that while the two approaches produce results that have a strong positive correlation, there are small differences that might affect downstream analyses.```{r}# use the BETA coefficients to build the SNP correlation matrixSNP_data_e0 <-bind_rows( Arya_f_l_GWAS %>%mutate(Study ="Arya_2010", Sex ="Female", Temperature =25), Huang_f_18_l_GWAS %>%mutate(Study="Huang_2020", Sex="Female", Temperature=18), Huang_f_25_l_GWAS %>%mutate(Study="Huang_2020", Sex="Female", Temperature=25), Huang_f_28_l_GWAS %>%mutate(Study="Huang_2020", Sex="Female", Temperature=28), Wilson_f_l_1_GWAS %>%mutate(Study="Wilson_2020_1", Sex="Female", Temperature=25), Wilson_f_l_2_GWAS %>%mutate(Study="Wilson_2020_2", Sex="Female", Temperature=25), Durham_f_l_GWAS %>%mutate(Study="Durham_2014", Sex="Female", Temperature=25), Patel_f_l_GWAS %>%mutate(Study="Patel_2021", Sex="Female", Temperature=23), Arya_m_l_GWAS %>%mutate(Study="Arya_2010", Sex="Male", Temperature=25), Huang_m_18_l_GWAS %>%mutate(Study="Huang_2020", Sex="Male", Temperature=18), Huang_m_25_l_GWAS %>%mutate(Study="Huang_2020", Sex="Male", Temperature =25), Huang_m_28_l_GWAS %>%mutate(Study ="Huang_2020", Sex ="Male", Temperature =28)) %>% dplyr::select(SNP, BETA, Study, Sex, Temperature) %>%pivot_wider(values_from = BETA, names_from =c(Study, Sex, Temperature)) %>% dplyr::select(-SNP) %>%rename(Arya_f_25 = Arya_2010_Female_25,Huang_f_18 = Huang_2020_Female_18,Huang_f_25 = Huang_2020_Female_25,Huang_f_28 = Huang_2020_Female_28,Wilson_f_25_1 = Wilson_2020_1_Female_25,Wilson_f_25_2 = Wilson_2020_2_Female_25,Durham_f_25 = Durham_2014_Female_25,Patel_f_23 = Patel_2021_Female_23,Arya_m_25 = Arya_2010_Male_25,Huang_m_18 = Huang_2020_Male_18,Huang_m_25 = Huang_2020_Male_25,Huang_m_28 = Huang_2020_Male_28)SNP_data_h <-bind_rows( Arya_f_h_GWAS %>%mutate(Study ="Arya_2010", Sex ="Female", Temperature =25), Huang_f_18_h_GWAS %>%mutate(Study="Huang_2020", Sex="Female", Temperature=18), Huang_f_25_h_GWAS %>%mutate(Study="Huang_2020", Sex="Female", Temperature=25), Huang_f_28_h_GWAS %>%mutate(Study="Huang_2020", Sex="Female", Temperature=28), Wilson_f_h_1_GWAS %>%mutate(Study="Wilson_2020_1", Sex="Female", Temperature=25), Wilson_f_h_2_GWAS %>%mutate(Study="Wilson_2020_2", Sex="Female", Temperature=25), Durham_f_h_GWAS %>%mutate(Study="Durham_2014", Sex="Female", Temperature=25), Patel_f_h_GWAS %>%mutate(Study="Patel_2021", Sex="Female", Temperature=23), Arya_m_h_GWAS %>%mutate(Study="Arya_2010", Sex="Male", Temperature=25), Huang_m_18_h_GWAS %>%mutate(Study="Huang_2020", Sex="Male", Temperature=18), Huang_m_25_h_GWAS %>%mutate(Study="Huang_2020", Sex="Male", Temperature =25), Huang_m_28_h_GWAS %>%mutate(Study ="Huang_2020", Sex ="Male", Temperature =28)) %>% dplyr::select(SNP, BETA, Study, Sex, Temperature) %>%pivot_wider(values_from = BETA, names_from =c(Study, Sex, Temperature)) %>% dplyr::select(-SNP) %>%rename(Arya_f_25 = Arya_2010_Female_25,Huang_f_18 = Huang_2020_Female_18,Huang_f_25 = Huang_2020_Female_25,Huang_f_28 = Huang_2020_Female_28,Wilson_f_25_1 = Wilson_2020_1_Female_25,Wilson_f_25_2 = Wilson_2020_2_Female_25,Durham_f_25 = Durham_2014_Female_25,Patel_f_23 = Patel_2021_Female_23,Arya_m_25 = Arya_2010_Male_25,Huang_m_18 = Huang_2020_Male_18,Huang_m_25 = Huang_2020_Male_25,Huang_m_28 = Huang_2020_Male_28)SNP_e0_corr_matrix <-cor(SNP_data_e0, use ="pairwise.complete.obs", method ="spearman")SNP_h_corr_matrix <-cor(SNP_data_h, use ="pairwise.complete.obs", method ="spearman")line_data <-bind_rows(Arya_2010_f, Huang_2020_f_18, Huang_2020_f_25, Huang_2020_f_28, Wilson_2020_f_1, Wilson_2020_f_2, Durham_2014_f_adjusted, Patel_2021_f, Arya_2010_m, Huang_2020_m_18, Huang_2020_m_25, Huang_2020_m_28) %>% dplyr::select(line, Treatment, Sex, Temperature, e0, h) %>%pivot_wider(values_from =c(e0, h), names_from =c(Treatment, Sex, Temperature)) line_data_e0 <- line_data %>% dplyr::select(contains("e0")) %>%rename(Arya_f_25 = e0_Arya_2010_1_Female_25,Huang_f_18 = e0_Huang_2020_1_Female_18,Huang_f_25 = e0_Huang_2020_1_Female_25,Huang_f_28 = e0_Huang_2020_1_Female_28,Wilson_f_25_1 = e0_Wilson_2020_1_Female_25,Wilson_f_25_2 = e0_Wilson_2020_2_Female_25,Durham_f_25 = e0_Durham_2014_1_Female_25,Patel_f_23 = e0_Patel_2021_1_Female_23,Arya_m_25 = e0_Arya_2010_1_Male_25,Huang_m_18 = e0_Huang_2020_1_Male_18,Huang_m_25 = e0_Huang_2020_1_Male_25,Huang_m_28 = e0_Huang_2020_1_Male_28)line_data_h <- line_data %>% dplyr::select(!contains("e0"), -line) %>%rename(Arya_f_25 = h_Arya_2010_1_Female_25,Huang_f_18 = h_Huang_2020_1_Female_18,Huang_f_25 = h_Huang_2020_1_Female_25,Huang_f_28 = h_Huang_2020_1_Female_28,Wilson_f_25_1 = h_Wilson_2020_1_Female_25,Wilson_f_25_2 = h_Wilson_2020_2_Female_25,Durham_f_25 = h_Durham_2014_1_Female_25,Patel_f_23 = h_Patel_2021_1_Female_23,Arya_m_25 = h_Arya_2010_1_Male_25,Huang_m_18 = h_Huang_2020_1_Male_18,Huang_m_25 = h_Huang_2020_1_Male_25,Huang_m_28 = h_Huang_2020_1_Male_28)line_e0_corr_matrix <-cor(line_data_e0, use ="pairwise.complete.obs", method ="spearman")line_h_corr_matrix <-cor(line_data_h, use ="pairwise.complete.obs", method ="spearman")```Let's visualise the genetic correlation between lifespan measures. First for life expectancy:```{r}breaksList <-seq(-1, 1, by =0.02)pheatmap(SNP_e0_corr_matrix, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =3, cutree_cols =3, angle_col =45, border_color ="white")```Now for lifespan equality```{r}pheatmap(SNP_h_corr_matrix, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =3, cutree_cols =3, angle_col =45, border_color ="white")```### Calculate meta-analytic test statisticsThe purpose of this meta-analysis is to search for SNPs that have some effect on ageing, expressed in many different contexts (sexes, temperatures and mating status'). To conduct CPASSOC for a given SNP, we need a summary statistic from each GWAS. A different number of lines were included in each GWAS, which can cause small differences in the number of SNPs assessed. We therefore prune the list of SNPs to those included in all univariate analyses. After pruning, 1,603,573 SNPs remain.The Bonferroni adjusted significance threshold for this number of tests is $p_{adj} = \frac{0.05}{1603573} = 3.12*10^{-8}$; here and for all future analysis, we use _p_ $< 10^{-8}$ as our significance threshold, as this is slightly more conservative and easier to quickly interpret.#### Life expectancy```{r}Arya_f_l_T <- Arya_f_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_f = T)Huang_f_18_l_T <- Huang_f_18_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_18 = T)Huang_f_25_l_T <- Huang_f_25_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_25 = T)Huang_f_28_l_T <- Huang_f_28_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_28 = T)Wilson_f_l_1_T <- Wilson_f_l_1_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_25_1 = T)Wilson_f_l_2_T <- Wilson_f_l_2_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_25_2 = T)Durham_f_l_T <- Durham_f_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Durham_f_25 = T)Patel_f_l_T <- Patel_f_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Patel_f_23 = T)Arya_m_l_T <- Arya_m_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_m = T)Huang_m_18_l_T <- Huang_m_18_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_18 = T)Huang_m_25_l_T <- Huang_m_25_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_25 = T)Huang_m_28_l_T <- Huang_m_28_l_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_28 = T)all_e0_effect_sizes <- Arya_f_l_T %>%inner_join(Huang_f_18_l_T, by ="SNP") %>%inner_join(Huang_f_25_l_T, by ="SNP") %>%inner_join(Huang_f_28_l_T, by ="SNP") %>%inner_join(Wilson_f_l_1_T, by ="SNP") %>%inner_join(Wilson_f_l_2_T, by ="SNP") %>%inner_join(Durham_f_l_T, by ="SNP") %>%inner_join(Patel_f_l_T, by ="SNP") %>%inner_join(Arya_m_l_T, by ="SNP") %>%inner_join(Huang_m_18_l_T, by ="SNP") %>%inner_join(Huang_m_25_l_T, by ="SNP") %>%inner_join(Huang_m_28_l_T, by ="SNP")all_e0_effect_sizes_values <- all_e0_effect_sizes %>% dplyr::select(2:13)Sample_size_all <-c(165, 183, 186, 177, 161, 161, 176, 193, 165, 183, 186, 177) if(!file.exists("data/Derived/GWAS_results/all_e0_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(all_e0_effect_sizes_values, Sample_size_all, SNP_e0_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary (potentially in sign) for different traits e.g. if a SNP has a sex or enviornment opposite effect on lifespan)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_all, SNP_e0_corr_matrix);S_het <-SHet(all_e0_effect_sizes_values, Sample_size_all, SNP_e0_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)# bind meta statistics with the univariate effect sizesall_e0_meta_results <- all_e0_effect_sizes %>%bind_cols(p_S_hom, p_S_het) write_csv(all_e0_meta_results, "data/Derived/GWAS_results/all_e0_meta_results.csv")} else all_e0_meta_results <-read_csv("data/Derived/GWAS_results/all_e0_meta_results.csv")```#### Lifespan equality```{r}Arya_f_h_T <- Arya_f_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_f = T)Huang_f_18_h_T <- Huang_f_18_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_18 = T)Huang_f_25_h_T <- Huang_f_25_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_25 = T)Huang_f_28_h_T <- Huang_f_28_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_28 = T)Wilson_f_h_1_T <- Wilson_f_h_1_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_25_1 = T)Wilson_f_h_2_T <- Wilson_f_h_2_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_25_2 = T)Durham_f_h_T <- Durham_f_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Durham_f_25 = T)Patel_f_h_T <- Patel_f_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Patel_f_23 = T)Arya_m_h_T <- Arya_m_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_m = T)Huang_m_18_h_T <- Huang_m_18_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_18 = T)Huang_m_25_h_T <- Huang_m_25_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_25 = T)Huang_m_28_h_T <- Huang_m_28_h_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_28 = T)all_h_effect_sizes <- Arya_f_h_T %>%inner_join(Huang_f_18_h_T, by ="SNP") %>%inner_join(Huang_f_25_h_T, by ="SNP") %>%inner_join(Huang_f_28_h_T, by ="SNP") %>%inner_join(Wilson_f_h_1_T, by ="SNP") %>%inner_join(Wilson_f_h_2_T, by ="SNP") %>%inner_join(Durham_f_h_T, by ="SNP") %>%inner_join(Patel_f_h_T, by ="SNP") %>%inner_join(Arya_m_h_T, by ="SNP") %>%inner_join(Huang_m_18_h_T, by ="SNP") %>%inner_join(Huang_m_25_h_T, by ="SNP") %>%inner_join(Huang_m_28_h_T, by ="SNP") all_h_effect_sizes_values <- all_h_effect_sizes %>% dplyr::select(2:13)if(!file.exists("data/Derived/GWAS_results/all_h_meta_results.csv")) {S_hom <-SHom(all_h_effect_sizes_values, Sample_size_all, SNP_h_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary (potentially in sign) for different traits e.g. if a SNP has a sex or enviornment opposite effect on lifespan)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_all, SNP_h_corr_matrix);S_het <-SHet(all_h_effect_sizes_values, Sample_size_all, SNP_h_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)# bind meta statistics with the univariate effect sizesall_h_meta_results <- all_h_effect_sizes %>%bind_cols(p_S_hom, p_S_het)write_csv(all_h_meta_results, "data/Derived/GWAS_results/all_h_meta_results.csv")} else all_h_meta_results <-read_csv("data/Derived/GWAS_results/all_h_meta_results.csv")```### Visualise the resultsWe combine GWAS summary statistics calculated from lifespan data measured across different contexts. It's likely that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the `S_het` calculated p-values.First lets show the effect of CPASSOC on the number of variants found to be associated with life expectancy and lifespan equality.**Table SX**...th effect of CPASSOC on the number of variants detected with associations that exceed the significance threshold.```{r}tibble(Analysis =c("CPASSOC", "Univariate", "CPASSOC", "Univariate"),Trait =c("Life expectancy", "Life expectancy", "Lifespan equality", "Lifespan equality"),`p < 1e-05 variants`=c(sum(all_e0_meta_results$meta_p_het <1e-05), p_05_SNPs,sum(all_h_meta_results$meta_p_het <1e-05), p_05_SNPs_h),`p < 1e-08 variants`=c(sum(all_e0_meta_results$meta_p_het <1e-08),sum(e0_table$`p < 1e-08 variants`),sum(all_h_meta_results$meta_p_het <1e-08),sum(h_table$`p < 1e-08 variants`))) %>%kable() %>%kable_styling()```**Table SX** the variants that have significant associations with life expectancy.```{r}# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.life_expectancy_variants <- all_e0_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^18, 3)/10^18,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)life_expectancy_variants %>%my_data_table()```**Table SX** the variants that have significant associations with lifespan equality.```{r}# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.lifespan_equality_variants <- all_h_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^11, 3)/10^11,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)lifespan_equality_variants %>%my_data_table()```Now lets build some 'Manhattan plots' to show where these significant associations can be found:```{r, fig.width=11}#| column: pagecommon_SNPs <- all_e0_meta_results %>%filter(meta_p_het <1e-08) %>%inner_join(all_h_meta_results %>%filter(meta_p_het <1e-08), by ="SNP") %>%mutate(common_SNP ="YES") %>% dplyr::select(SNP, common_SNP)e0_results <- all_e0_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het) %>%left_join(common_SNPs) %>%mutate(common_SNP =if_else(is.na(common_SNP), "NO", common_SNP))h_results <- all_h_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het) %>%left_join(common_SNPs) %>%mutate(common_SNP =if_else(is.na(common_SNP), "NO", common_SNP))# plot the results using the manhattan plot custom function we defined earliere0_het_plot <-build_manhattan_plot(e0_results) +labs(title ="Life expectancy") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 22))h_het_plot <-build_manhattan_plot(h_results) +labs(title ="Lifespan equality") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 22))e0_het_plot + h_het_plot```Plot the univariate effect sizes for each of the SNPs associated with life expectancy / lifespan equality at the genome-wide significance threshold (p < $0.05^{-8}$) after CPASSOC.**Life expectancy**```{r, fig.height=12}SNP_heatmap_e0 <- all_e0_meta_results %>%filter(meta_p_het <1e-08) %>%arrange(meta_p_het) %>% dplyr::select(1:13) row_name <- SNP_heatmap_e0$SNPSNP_heatmap_e0 <- SNP_heatmap_e0 %>% dplyr::select(-SNP) %>%as.matrix()rownames(SNP_heatmap_e0) <- row_namebreaksList <-seq(-5, 5, by =0.1)annotation_SNPs <- all_e0_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP) %>%mutate(Chromosome =case_when(str_detect(SNP, "2L") ~"2L",str_detect(SNP, "2R") ~"2R",str_detect(SNP, "3L") ~"3L",str_detect(SNP, "3R") ~"3R",str_detect(SNP, "X") ~"X"))annotation_studies <-tibble(Study =c("Arya_f","Huang_f_18","Huang_f_25","Huang_f_28","Wilson_f_25_1","Wilson_f_25_2","Durham_f_25","Patel_f_23","Arya_m","Huang_m_18","Huang_m_25","Huang_m_28"),Temperature =c("25","18","25","28","25","25","25","23","25","18","25","28")) %>%mutate(Sex =case_when(str_detect(Study, "_f") ~"Female",.default ="Male"),Mating =case_when(str_detect(Study, "Arya") ~"NO",str_detect(Study, "Huang") ~"Throughout life",str_detect(Study, "Wilson") ~"Early life",str_detect(Study, "Durham") ~"Throughout life",str_detect(Study, "Patel") ~"Early life"),Author =str_extract(Study, ".*(?=\\_)"),Author =str_remove(Author, "_f"),Author =str_remove(Author, "_m"))# create a study annotation column, need this to be a data.frame rather than a tibble for some reason Study_details <- annotation_studies %>%as.data.frame() %>% dplyr::select(Study, Temperature, Mating)my_categories <-data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],Mating = Study_details[, 3])my_colors <-list(Temperature =c("18"="#7bbcd5", # sailboat colours from pnw"23"="#d0e2af","25"="#f5db99","28"="#e89c81"),Mating =c("NO"="#f8e3d1", # Shuksan from pnw"Early life"="#d7b1c5","Throughout life"="#ac8eab"),Chromosome =c("2L"="#d8aedd", # lake colours from pnw"2R"="#cb74ad","3L"="#11c2b5","3R"="#72e1e1","X"="#fbcc74"))# create a SNP annotation columnSNP_details <- annotation_SNPs %>%as.data.frame()my_SNP_categories <-data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])my_col_names <-c("Arya et al females", "Huang et al females", "Huang et al females","Huang et al females", "Wilson et al females 1", "Wilson et al females 2", "Durham et al females","Patel et al females", "Arya et al males", "Huang et al males", "Huang et al males","Huang et al males")pheatmap(SNP_heatmap_e0, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =6, cutree_cols =5, angle_col =45, border_color ="white",annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,fontsize =8, labels_col = my_col_names)```**Lifespan equality**```{r}SNP_heatmap_h <- all_h_meta_results %>%filter(meta_p_het <1e-08) %>%arrange(meta_p_het) %>% dplyr::select(1:13) row_name <- SNP_heatmap_h$SNPSNP_heatmap_h <- SNP_heatmap_h %>% dplyr::select(-SNP) %>%as.matrix()rownames(SNP_heatmap_h) = row_namebreaksList <-seq(-5, 5, by =0.1)annotation_SNPs_h <- all_h_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP) %>%mutate(Chromosome =case_when(str_detect(SNP, "2L") ~"2L",str_detect(SNP, "2R") ~"2R",str_detect(SNP, "3L") ~"3L",str_detect(SNP, "3R") ~"3R",str_detect(SNP, "X") ~"X"))# create a SNP annotation columnSNP_details_h <- annotation_SNPs_h %>%as.data.frame()my_SNP_categories_h <-data.frame(row.names = SNP_details_h[, 1], Chromosome = SNP_details_h[, 2])pheatmap(SNP_heatmap_h, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =3, cutree_cols =4, angle_col =45, border_color ="white",annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories_h,fontsize =8, labels_col = my_col_names)```# Analysis of sexually antagonistic effects on ageing## The intersex genetic correlationLets take a quick look at the intersex-genetic correlations for our metrics in each relevant measurement condition.```{r}Arya_two_sexes_SNPS <-left_join( Arya_f_l_GWAS %>% dplyr::select(1:2) %>%rename(Female_e0_effect = BETA), Arya_m_l_GWAS %>% dplyr::select(1:2) %>%rename(Male_e0_effect = BETA) ) %>%left_join(Arya_f_h_GWAS %>% dplyr::select(1:2) %>%rename(Female_h_effect = BETA)) %>%left_join(Arya_m_h_GWAS %>% dplyr::select(1:2) %>%rename(Male_h_effect = BETA))Huang_18_two_sexes_SNPS <-left_join( Huang_f_18_l_GWAS %>% dplyr::select(1:2) %>%rename(Female_e0_effect = BETA), Huang_m_18_l_GWAS %>% dplyr::select(1:2) %>%rename(Male_e0_effect = BETA) ) %>%left_join(Huang_f_18_h_GWAS %>% dplyr::select(1:2) %>%rename(Female_h_effect = BETA)) %>%left_join(Huang_m_18_h_GWAS %>% dplyr::select(1:2) %>%rename(Male_h_effect = BETA))Huang_25_two_sexes_SNPS <-left_join( Huang_f_25_l_GWAS %>% dplyr::select(1:2) %>%rename(Female_e0_effect = BETA), Huang_m_25_l_GWAS %>% dplyr::select(1:2) %>%rename(Male_e0_effect = BETA) ) %>%left_join(Huang_f_25_h_GWAS %>% dplyr::select(1:2) %>%rename(Female_h_effect = BETA)) %>%left_join(Huang_m_25_h_GWAS %>% dplyr::select(1:2) %>%rename(Male_h_effect = BETA)) Huang_28_two_sexes_SNPS <-left_join( Huang_f_28_l_GWAS %>% dplyr::select(1:2) %>%rename(Female_e0_effect = BETA), Huang_m_28_l_GWAS %>% dplyr::select(1:2) %>%rename(Male_e0_effect = BETA) ) %>%left_join(Huang_f_28_h_GWAS %>% dplyr::select(1:2) %>%rename(Female_h_effect = BETA)) %>%left_join(Huang_m_28_h_GWAS %>% dplyr::select(1:2) %>%rename(Male_h_effect = BETA))```Plot the within-treatment intersex-genetic correlations```{r, fig.height=10}pal <-pnw_palette("Shuksan2", 100)Arya_e0_sex_plot <- Arya_two_sexes_SNPS %>%ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female life expectancy", y ="Effect on male\nlife expectancy",title ="Arya et al 2010: 25C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Arya_h_sex_plot <- Arya_two_sexes_SNPS %>%ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female lifespan equality", y ="Effect on male\nlifespan equality",title ="Arya et al 2010: 25C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_18_e0_sex_plot <- Huang_18_two_sexes_SNPS %>%ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female life expectancy", y ="Effect on male\nlife expectancy",title ="Huang et al, 2020: 18C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_18_h_sex_plot <- Huang_18_two_sexes_SNPS %>%ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female lifespan equality", y ="Effect on male\nlifespan equality",title ="Huang et al, 2020: 18C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_25_e0_sex_plot <- Huang_25_two_sexes_SNPS %>%ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female life expectancy", y ="Effect on male\nlife expectancy",title ="Huang et al, 2020: 25C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_25_h_sex_plot <- Huang_25_two_sexes_SNPS %>%ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female lifespan equality", y ="Effect on male\nlifespan equality",title ="Huang et al, 2020: 25C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_28_e0_sex_plot <- Huang_28_two_sexes_SNPS %>%ggplot(aes(x = Female_e0_effect, y = Male_e0_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female life expectancy", y ="Effect on male\nlife expectancy",title ="Huang et al, 2020: 28C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))Huang_28_h_sex_plot <- Huang_28_two_sexes_SNPS %>%ggplot(aes(x = Female_h_effect, y = Male_h_effect )) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +stat_binhex(bins =50) +scale_fill_gradientn(colours = pal) +#geom_point() +#stat_smooth(method = "lm", formula = y ~ x + I(x^2), linewidth = 0.75, colour = "black") +coord_cartesian(xlim =c(-0.8, 0.8), ylim =c(-0.8, 0.8)) +labs(x ="Effect on female lifespan equality", y ="Effect on male\nlifespan equality",title ="Huang et al, 2020: 28C") +theme_bw() +theme(legend.position ="none",text =element_text(size =10),plot.title =element_text(hjust =0.5, size =11))(Arya_e0_sex_plot + Arya_h_sex_plot) / (Huang_18_e0_sex_plot + Huang_18_h_sex_plot) / (Huang_25_e0_sex_plot + Huang_25_h_sex_plot) / (Huang_28_e0_sex_plot + Huang_28_h_sex_plot) ```## Axis of antagonism and concordanceWhile there is a strong inter-sex genetic correlation for both life expectancy and lifespan equality, both correlations are significantly lower than one. Thus, it is likely that there are alleles that have sex-limited or sex-opposite effects on lifespan/lifespan equality. In an attempt to identify these genetic variants, we select those studies that measured lifespan in both sexes and calculate a sexual antagonism index for each line, following previous studies focused on similar questions (e.g. [Berger et al, 2014](), [Grieshop and Arnqvist, 2018](), [Ruzicka et al, 2020]()). To create the index, we rotate the coordinate system of the female and male fitness plane by 45 degrees, by multiplying the 204 x 2 fitness matrix by the rotation matrix$$\mathbf{R} = \begin{bmatrix} \frac{-1}{\sqrt{2}} & \frac{-1}{\sqrt{2}} \\[0.3em] \frac{-1}{\sqrt{2}} & \frac{1}{\sqrt{2}} \\[0.3em] \end{bmatrix}$$```{r}rotation_matrix <-matrix(c(-1/sqrt(2), -1/sqrt(2), -1/sqrt(2), 1/sqrt(2)),nrow =2)rotate_axis_function <-function(data_1, data_2, Females, Males, metric){left_join( data_1 %>%rename(Females = metric), data_2 %>%rename(Males = metric)) %>% dplyr::select(-line) %>%as.matrix() %*% rotation_matrix %>%as_tibble() %>%rename(SC_axis = V1,SA_axis = V2) %>%bind_cols(left_join( data_1 %>%rename(Females = metric), data_2%>%rename(Males = metric))) %>% dplyr::select(line, Females, Males, everything())}Arya_l_SA <-rotate_axis_function(Arya_f_l, Arya_m_l, Females ="Female_life_expectancy", Males ="Male_life_expectancy",metric ="e0_scaled") Huang_18_l_SA <-rotate_axis_function(Huang_f_18_l, Huang_m_18_l, Females ="Female_life_expectancy", Males ="Male_life_expectancy",metric ="e0_scaled") Huang_25_l_SA <-rotate_axis_function(Huang_f_25_l, Huang_m_25_l, Females ="Female_life_expectancy", Males ="Male_life_expectancy",metric ="e0_scaled") Huang_28_l_SA <-rotate_axis_function(Huang_f_28_l, Huang_m_28_l, Females ="Female_life_expectancy", Males ="Male_life_expectancy",metric ="e0_scaled") # now lifespan equalityArya_h_SA <-rotate_axis_function(Arya_f_h, Arya_m_h, Females ="Female_lifespan_equality", Males ="Male_lifespan_equality",metric ="h") Huang_18_h_SA <-rotate_axis_function(Huang_f_18_h, Huang_m_18_h, Females ="Female_lifespan_equality", Males ="Male_lifespan_equality",metric ="h") Huang_25_h_SA <-rotate_axis_function(Huang_f_25_h, Huang_m_25_h, Females ="Female_lifespan_equality", Males ="Male_lifespan_equality",metric ="h") Huang_28_h_SA <-rotate_axis_function(Huang_f_28_h, Huang_m_28_h, Females ="Female_lifespan_equality", Males ="Male_lifespan_equality",metric ="h") ```To visualise this transformation lets plot the standardised life expectancy means presented in Arya _et al_ 2010```{r}Arya_l_SA %>%ggplot(aes(x = Females, y = Males)) +geom_vline(xintercept =0, linetype =2, linewidth =1) +geom_hline(yintercept =0, linetype =2, linewidth =1) +geom_abline(intercept =0, slope =-1, linewidth =1, linetype =2, colour ="salmon") +geom_abline(intercept =0, slope =1, linewidth =1, linetype =2, colour ="salmon") +geom_hline(yintercept =0, linetype =2, linewidth =1) +geom_point(aes(fill = SA_axis), shape =21, size =4) +scale_fill_moma_c("Avedon", direction =-1, limits =c(-2.5, 2.5)) +coord_cartesian(xlim =c(-4, 4), ylim =c(-4, 4)) +labs(fill ="SA index",x ="Life expectancy (female)",y ="Life expectancy (male)",title ="Arya et al 2010 data") +theme_bw() +theme(plot.title =element_text(hjust =0.5))```**Fig SX**. Residual line means for female and male life expectancy. Points indicate a single genetic line. The sexual antagonism index ranges from male-beneficial and female-detrimental (orange points) to female-beneficial and male-detrimental (green points). The red dotted lines show the rotation that has been performed to create the antagonism and concordance axis. ### Run univariate GWAS```{r}SA_e0_Arya <-prep_for_SA_GWAS(Arya_l_SA)SA_e0_Huang_18 <-prep_for_SA_GWAS(Huang_18_l_SA)SA_e0_Huang_25 <-prep_for_SA_GWAS(Huang_25_l_SA)SA_e0_Huang_28 <-prep_for_SA_GWAS(Huang_28_l_SA)SA_h_Arya <-prep_for_SA_GWAS(Arya_h_SA)SA_h_Huang_18 <-prep_for_SA_GWAS(Huang_18_h_SA)SA_h_Huang_25 <-prep_for_SA_GWAS(Huang_25_h_SA)SA_h_Huang_28 <-prep_for_SA_GWAS(Huang_28_h_SA)SC_e0_Arya <-prep_for_SC_GWAS(Arya_l_SA)SC_e0_Huang_18 <-prep_for_SC_GWAS(Huang_18_l_SA)SC_e0_Huang_25 <-prep_for_SC_GWAS(Huang_25_l_SA)SC_e0_Huang_28 <-prep_for_SC_GWAS(Huang_28_l_SA)SC_h_Arya <-prep_for_SC_GWAS(Arya_h_SA)SC_h_Huang_18 <-prep_for_SC_GWAS(Huang_18_h_SA)SC_h_Huang_25 <-prep_for_SC_GWAS(Huang_25_h_SA)SC_h_Huang_28 <-prep_for_SC_GWAS(Huang_28_h_SA)if(!file.exists("data/Derived/GWAS_results/SA_e0_Arya.tsv.gz")) {run_GWAS(SA_e0_Arya)run_GWAS(SA_e0_Huang_18)run_GWAS(SA_e0_Huang_25)run_GWAS(SA_e0_Huang_28)run_GWAS(SA_h_Arya)run_GWAS(SA_h_Huang_18)run_GWAS(SA_h_Huang_25)run_GWAS(SA_h_Huang_28)run_GWAS(SC_e0_Arya)run_GWAS(SC_e0_Huang_18)run_GWAS(SC_e0_Huang_25)run_GWAS(SC_e0_Huang_28)run_GWAS(SC_h_Arya)run_GWAS(SC_h_Huang_18)run_GWAS(SC_h_Huang_25)run_GWAS(SC_h_Huang_28)}SA_e0_Arya_GWAS <-read_tsv("data/Derived/GWAS_results/SA_e0_Arya.tsv.gz") SA_e0_Huang_18_GWAS <-read_tsv("data/Derived/GWAS_results/SA_e0_Huang_18.tsv.gz") SA_e0_Huang_25_GWAS <-read_tsv("data/Derived/GWAS_results/SA_e0_Huang_25.tsv.gz") SA_e0_Huang_28_GWAS <-read_tsv("data/Derived/GWAS_results/SA_e0_Huang_28.tsv.gz") SA_h_Arya_GWAS <-read_tsv("data/Derived/GWAS_results/SA_h_Arya.tsv.gz") SA_h_Huang_18_GWAS <-read_tsv("data/Derived/GWAS_results/SA_h_Huang_18.tsv.gz")SA_h_Huang_25_GWAS <-read_tsv("data/Derived/GWAS_results/SA_h_Huang_25.tsv.gz")SA_h_Huang_28_GWAS <-read_tsv("data/Derived/GWAS_results/SA_h_Huang_28.tsv.gz")SC_e0_Arya_GWAS <-read_tsv("data/Derived/GWAS_results/SC_e0_Arya.tsv.gz") SC_e0_Huang_18_GWAS <-read_tsv("data/Derived/GWAS_results/SC_e0_Huang_18.tsv.gz") SC_e0_Huang_25_GWAS <-read_tsv("data/Derived/GWAS_results/SC_e0_Huang_25.tsv.gz") SC_e0_Huang_28_GWAS <-read_tsv("data/Derived/GWAS_results/SC_e0_Huang_28.tsv.gz") SC_h_Arya_GWAS <-read_tsv("data/Derived/GWAS_results/SC_h_Arya.tsv.gz") SC_h_Huang_18_GWAS <-read_tsv("data/Derived/GWAS_results/SC_h_Huang_18.tsv.gz")SC_h_Huang_25_GWAS <-read_tsv("data/Derived/GWAS_results/SC_h_Huang_25.tsv.gz")SC_h_Huang_28_GWAS <-read_tsv("data/Derived/GWAS_results/SC_h_Huang_28.tsv.gz")```How many variants do we find that have notable associations with the SA axis for life expectancy:**Table SX**. Genotype to phenotype associations detected by univariate GWAS, for the life expectancy sexual antagonism axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.```{r}# filter down to sig associationsSA_e0_table <-bind_rows(SA_e0_Arya_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_e0_Arya_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_e0_Huang_18_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_e0_Huang_18_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_e0_Huang_25_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_e0_Huang_25_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_e0_Huang_28_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_e0_Huang_28_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?SA_p_05_SNPs_e0 <-bind_rows( SA_e0_Arya_GWAS %>%filter(P <1e-05), SA_e0_Huang_18_GWAS %>%filter(P <1e-05), SA_e0_Huang_25_GWAS %>%filter(P <1e-05), SA_e0_Huang_28_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()SA_e0_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= SA_p_05_SNPs_e0,`p < 1e-08 variants`=sum(SA_e0_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()```**Table SX**. Genotype to phenotype associations detected by univariate GWAS, for the life expectancy sexual concordance axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.```{r}# filter down to sig associationsSC_e0_table <-bind_rows(SC_e0_Arya_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_e0_Arya_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_e0_Huang_18_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_e0_Huang_18_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_e0_Huang_25_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_e0_Huang_25_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_e0_Huang_28_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_e0_Huang_28_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?SC_p_05_SNPs_e0 <-bind_rows( SC_e0_Arya_GWAS %>%filter(P <1e-05), SC_e0_Huang_18_GWAS %>%filter(P <1e-05), SC_e0_Huang_25_GWAS %>%filter(P <1e-05), SC_e0_Huang_28_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()SC_e0_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= SC_p_05_SNPs_e0,`p < 1e-08 variants`=sum(SC_e0_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()```**Table SX**. Genotype to phenotype associations detected by univariate GWAS, for the lifespan equality sexual antagonism axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.```{r}# filter down to sig associationsSA_h_table <-bind_rows(SA_h_Arya_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_h_Arya_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_h_Huang_18_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_h_Huang_18_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_h_Huang_25_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_h_Huang_25_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SA_h_Huang_28_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SA_h_Huang_28_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?SA_p_05_SNPs_h <-bind_rows( SA_h_Arya_GWAS %>%filter(P <1e-05), SA_h_Huang_18_GWAS %>%filter(P <1e-05), SA_h_Huang_25_GWAS %>%filter(P <1e-05), SA_h_Huang_28_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()SA_h_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= SA_p_05_SNPs_h,`p < 1e-08 variants`=sum(SA_h_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()```**Table SX**. Genotype to phenotype associations detected by univariate GWAS, for the lifespan equality sexual concordance axis. The total row shows the number of unique candidate variants identified across all studies. *Wilson et al phenotyped lifespan under two separate dietary conditions, which we include separately in our analysis.```{r}# filter down to sig associationsSC_h_table <-bind_rows(SC_h_Arya_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_h_Arya_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_h_Huang_18_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_h_Huang_18_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_h_Huang_25_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_h_Huang_25_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),SC_h_Huang_28_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(SC_h_Huang_28_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?SC_p_05_SNPs_h <-bind_rows( SC_h_Arya_GWAS %>%filter(P <1e-05), SC_h_Huang_18_GWAS %>%filter(P <1e-05), SC_h_Huang_25_GWAS %>%filter(P <1e-05), SC_h_Huang_28_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()SC_h_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= SC_p_05_SNPs_h,`p < 1e-08 variants`=sum(SC_h_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()```### Run CPASSOC#### Generate the genetic correlation matrixWe calculate the genetic correlations between traits using both the line mean and SNP effect size comparisons. We use the SNP correlations following Zhu et al (2015) and show these in the below plots. Note that while the two approaches produce results that have a strong positive correlation, there are small differences that will affect downstream analyses.Build the SA correlations matrices```{r}# use the BETA coefficients to build the SNP correlation matrixSNP_SA_data_e0 <-bind_rows( SA_e0_Arya_GWAS %>%mutate(Study ="Arya", Temperature =25), SA_e0_Huang_18_GWAS %>%mutate(Study="Huang", Temperature=18), SA_e0_Huang_25_GWAS %>%mutate(Study="Huang", Temperature=25), SA_e0_Huang_28_GWAS %>%mutate(Study="Huang", Temperature=28)) %>% dplyr::select(SNP, BETA, Study, Temperature) %>%pivot_wider(values_from = BETA, names_from =c(Study, Temperature)) %>% dplyr::select(-SNP) SNP_SA_data_h <-bind_rows( SA_h_Arya_GWAS %>%mutate(Study ="Arya", Temperature =25), SA_h_Huang_18_GWAS %>%mutate(Study="Huang", Temperature=18), SA_h_Huang_25_GWAS %>%mutate(Study="Huang", Temperature=25), SA_h_Huang_28_GWAS %>%mutate(Study="Huang", Temperature=28)) %>% dplyr::select(SNP, BETA, Study, Temperature) %>%pivot_wider(values_from = BETA, names_from =c(Study, Temperature)) %>% dplyr::select(-SNP) SNP_SA_e0_corr_matrix <-cor(SNP_SA_data_e0, use ="pairwise.complete.obs", method ="spearman")SNP_SA_h_corr_matrix <-cor(SNP_SA_data_h, use ="pairwise.complete.obs", method ="spearman")SA_e0_line_data <-bind_rows(Arya_l_SA %>%mutate(Study ="Arya_25"), Huang_18_l_SA %>%mutate(Study ="Huang_18"), Huang_25_l_SA %>%mutate(Study ="Huang_25"), Huang_28_l_SA %>%mutate(Study ="Huang_28")) %>% dplyr::select(line, Study, SA_axis) %>%pivot_wider(values_from = SA_axis, names_from = Study) %>% dplyr::select(-line)SA_h_line_data <-bind_rows(Arya_h_SA %>%mutate(Study ="Arya_25"), Huang_18_h_SA %>%mutate(Study ="Huang_18"), Huang_25_h_SA %>%mutate(Study ="Huang_25"), Huang_28_h_SA %>%mutate(Study ="Huang_28")) %>% dplyr::select(line, Study, SA_axis) %>%pivot_wider(values_from = SA_axis, names_from = Study) %>% dplyr::select(-line)SA_line_e0_corr_matrix <-cor(SA_e0_line_data, use ="pairwise.complete.obs", method ="spearman")SA_line_h_corr_matrix <-cor(SA_h_line_data, use ="pairwise.complete.obs", method ="spearman")```Build the SC correlations matrices```{r}# use the BETA coefficients to build the SNP correlation matrixSNP_SC_data_e0 <-bind_rows( SC_e0_Arya_GWAS %>%mutate(Study ="Arya", Temperature =25), SC_e0_Huang_18_GWAS %>%mutate(Study="Huang", Temperature=18), SC_e0_Huang_25_GWAS %>%mutate(Study="Huang", Temperature=25), SC_e0_Huang_28_GWAS %>%mutate(Study="Huang", Temperature=28)) %>% dplyr::select(SNP, BETA, Study, Temperature) %>%pivot_wider(values_from = BETA, names_from =c(Study, Temperature)) %>% dplyr::select(-SNP) SNP_SC_data_h <-bind_rows( SC_h_Arya_GWAS %>%mutate(Study ="Arya", Temperature =25), SC_h_Huang_18_GWAS %>%mutate(Study="Huang", Temperature=18), SC_h_Huang_25_GWAS %>%mutate(Study="Huang", Temperature=25), SC_h_Huang_28_GWAS %>%mutate(Study="Huang", Temperature=28)) %>% dplyr::select(SNP, BETA, Study, Temperature) %>%pivot_wider(values_from = BETA, names_from =c(Study, Temperature)) %>% dplyr::select(-SNP) SNP_SC_e0_corr_matrix <-cor(SNP_SC_data_e0, use ="pairwise.complete.obs", method ="spearman")SNP_SC_h_corr_matrix <-cor(SNP_SC_data_h, use ="pairwise.complete.obs", method ="spearman")SC_e0_line_data <-bind_rows(Arya_l_SA %>%mutate(Study ="Arya_25"), Huang_18_l_SA %>%mutate(Study ="Huang_18"), Huang_25_l_SA %>%mutate(Study ="Huang_25"), Huang_28_l_SA %>%mutate(Study ="Huang_28")) %>% dplyr::select(line, Study, SC_axis) %>%pivot_wider(values_from = SC_axis, names_from = Study) %>% dplyr::select(-line)SC_h_line_data <-bind_rows(Arya_h_SA %>%mutate(Study ="Arya_25"), Huang_18_h_SA %>%mutate(Study ="Huang_18"), Huang_25_h_SA %>%mutate(Study ="Huang_25"), Huang_28_h_SA %>%mutate(Study ="Huang_28")) %>% dplyr::select(line, Study, SC_axis) %>%pivot_wider(values_from = SC_axis, names_from = Study) %>% dplyr::select(-line)SC_line_e0_corr_matrix <-cor(SC_e0_line_data, use ="pairwise.complete.obs", method ="spearman")SC_line_h_corr_matrix <-cor(SC_h_line_data, use ="pairwise.complete.obs", method ="spearman")```#### Calculate meta-analytic test statisticsThe purpose of these meta-analysis is to search for SNPs that have a) sex-specific and b) sexually concordant effects on ageing. To conduct CPASSOC for a given SNP, we need a summary statistic from each GWAS. A different number of lines were included in each GWAS, which can cause small differences in the number of SNPs assessed. We therefore prune the list of SNPs to those included in all univariate analyses. After pruning, 1,619,501 SNPs remain.**Sex-antagonism and concordance for life expectancy**First run CPASSOC for the SA axis```{r}SA_Arya_l_T <- SA_e0_Arya_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya = T)SA_Huang_18_l_T <- SA_e0_Huang_18_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_18 = T)SA_Huang_25_l_T <- SA_e0_Huang_25_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_25 = T)SA_Huang_28_l_T <- SA_e0_Huang_28_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_28 = T)SA_e0_effect_sizes <- SA_Arya_l_T %>%inner_join(SA_Huang_18_l_T, by ="SNP") %>%inner_join(SA_Huang_25_l_T, by ="SNP") %>%inner_join(SA_Huang_28_l_T, by ="SNP") SA_e0_effect_size_values <- SA_e0_effect_sizes %>% dplyr::select(2:5)Sample_size_SA <-c(165, 183, 186, 177) if(!file.exists("data/Derived/GWAS_results/SA_e0_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(SA_e0_effect_size_values, Sample_size_SA, SNP_SA_e0_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_SA, SNP_SA_e0_corr_matrix);S_het <-SHet(SA_e0_effect_size_values, Sample_size_SA, SNP_SA_e0_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)SA_e0_meta_results <- SA_e0_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(SA_e0_meta_results, "data/Derived/GWAS_results/SA_e0_meta_results.csv")} else SA_e0_meta_results <-read_csv("data/Derived/GWAS_results/SA_e0_meta_results.csv")```Now run it for the SC axis```{r}SC_Arya_l_T <- SC_e0_Arya_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya = T)SC_Huang_18_l_T <- SC_e0_Huang_18_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_18 = T)SC_Huang_25_l_T <- SC_e0_Huang_25_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_25 = T)SC_Huang_28_l_T <- SC_e0_Huang_28_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_28 = T)SC_e0_effect_sizes <- SC_Arya_l_T %>%inner_join(SC_Huang_18_l_T, by ="SNP") %>%inner_join(SC_Huang_25_l_T, by ="SNP") %>%inner_join(SC_Huang_28_l_T, by ="SNP") SC_e0_effect_size_values <- SC_e0_effect_sizes %>% dplyr::select(2:5)Sample_size_SC <-c(165, 183, 186, 177) if(!file.exists("data/Derived/GWAS_results/SC_e0_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(SC_e0_effect_size_values, Sample_size_SC, SNP_SC_e0_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_SC, SNP_SC_e0_corr_matrix);S_het <-SHet(SC_e0_effect_size_values, Sample_size_SC, SNP_SC_e0_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)SC_e0_meta_results <- SC_e0_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(SC_e0_meta_results, "data/Derived/GWAS_results/SC_e0_meta_results.csv")} else SC_e0_meta_results <-read_csv("data/Derived/GWAS_results/SC_e0_meta_results.csv")```**Sex-antagonism and concordance for lifespan equality**Sexual antagonism```{r}# lifespan equalitySA_Arya_h_T <- SA_h_Arya_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya = T)SA_Huang_18_h_T <- SA_h_Huang_18_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_18 = T)SA_Huang_25_h_T <- SA_h_Huang_25_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_25 = T)SA_Huang_28_h_T <- SA_h_Huang_28_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_28 = T)SA_h_effect_sizes <- SA_Arya_h_T %>%inner_join(SA_Huang_18_h_T, by ="SNP") %>%inner_join(SA_Huang_25_h_T, by ="SNP") %>%inner_join(SA_Huang_28_h_T, by ="SNP") SA_h_effect_size_values <- SA_h_effect_sizes %>% dplyr::select(2:5)if(!file.exists("data/Derived/GWAS_results/SA_h_meta_results.csv")) {S_hom <-SHom(SA_h_effect_size_values, Sample_size_SA, SNP_SA_h_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (probably not such a problem here given all traits we investigate are some version of life expectancy)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_SA, SNP_SA_h_corr_matrix);S_het <-SHet(SA_h_effect_size_values, Sample_size_SA, SNP_SA_h_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)SA_h_meta_results <- SA_h_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(SA_h_meta_results, "data/Derived/GWAS_results/SA_h_meta_results.csv")} else SA_h_meta_results <-read_csv("data/Derived/GWAS_results/SA_h_meta_results.csv")```Sexual concordance```{r}# lifespan equalitySC_Arya_h_T <- SC_h_Arya_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya = T)SC_Huang_18_h_T <- SC_h_Huang_18_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_18 = T)SC_Huang_25_h_T <- SC_h_Huang_25_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_25 = T)SC_Huang_28_h_T <- SC_h_Huang_28_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_28 = T)SC_h_effect_sizes <- SC_Arya_h_T %>%inner_join(SC_Huang_18_h_T, by ="SNP") %>%inner_join(SC_Huang_25_h_T, by ="SNP") %>%inner_join(SC_Huang_28_h_T, by ="SNP") SC_h_effect_size_values <- SC_h_effect_sizes %>% dplyr::select(2:5)if(!file.exists("data/Derived/GWAS_results/SC_h_meta_results.csv")) {S_hom <-SHom(SC_h_effect_size_values, Sample_size_SC, SNP_SC_h_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (probably not such a problem here given all traits we investigate are some version of life expectancy)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_SC, SNP_SC_h_corr_matrix);S_het <-SHet(SC_h_effect_size_values, Sample_size_SC, SNP_SC_h_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)SC_h_meta_results <- SC_h_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(SC_h_meta_results, "data/Derived/GWAS_results/SC_h_meta_results.csv")} else SC_h_meta_results <-read_csv("data/Derived/GWAS_results/SC_h_meta_results.csv")```### Visualise the resultsWe combine GWAS summary statistics calculated from lifespan data measured across different contexts. It's possible that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the `S_het` calculated p-values.First lets show the effect of CPASSOC on the number of variants found to be associated with life expectancy and lifespan equality.**Table XX**. Genotype to phenotype associations detected by univariate GWAS, for **lifespan equality**. The total row shows the number of unique candidate variants identified across all studies.```{r}tibble(Analysis =c("CPASSOC", "Univariate", "CPASSOC", "Univariate", "CPASSOC", "Univariate", "CPASSOC", "Univariate"),Trait =c("Life expectancy SA", "Life expectancy SA", "Life expectancy SC", "Life expectancy SC","Lifespan equality SA", "Lifespan equality SA", "Lifespan equality SC", "Lifespan equality SC"),`p < 1e-05 variants`=c(sum(SA_e0_meta_results$meta_p_het <1e-05), SA_p_05_SNPs_e0,sum(SC_e0_meta_results$meta_p_het <1e-05), SC_p_05_SNPs_e0,sum(SA_h_meta_results$meta_p_het <1e-05), SA_p_05_SNPs_h,sum(SC_h_meta_results$meta_p_het <1e-05), SC_p_05_SNPs_h),`p < 1e-08 variants`=c(sum(SA_e0_meta_results$meta_p_het <1e-08),sum(SA_e0_table$`p < 1e-08 variants`),sum(SC_e0_meta_results$meta_p_het <1e-08),sum(SC_e0_table$`p < 1e-08 variants`),sum(SA_h_meta_results$meta_p_het <1e-08),sum(SA_h_table$`p < 1e-08 variants`),sum(SC_h_meta_results$meta_p_het <1e-08),sum(SC_h_table$`p < 1e-08 variants`))) %>%kable() %>%kable_styling()#SA_e0_meta_results %>% filter(meta_p_hom < 1e-05) %>% # anti_join(SA_p_05_SNPs_e0) %>% arrange(meta_p_hom)#SA_p_05_SNPs_e0```**Table SX**...variants that have sex-specific associations with life expectancy.```{r}# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.SA_e0_variants <- SA_e0_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^11, 2)/10^11,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)SA_e0_variants %>%my_data_table()```**Table SX**...variants that have sexually concordant associations with life expectancy.```{r}# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.SC_e0_variants <- SC_e0_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^18, 2)/10^18,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)SC_e0_variants %>%my_data_table()```**Table SX**...variants that have sexually concordant associations with life expectancy. There are no variants with strong sex-specific associations with lifespan equality.```{r}# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.SC_h_variants <- SC_h_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^8, 4)/10^8,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)SC_h_variants %>%my_data_table()```Now lets build some 'Manhattan plots' to show where these significant associations can be found:```{r, fig.width=11, fig.height = 8}#| column: pageSA_e0_results <- SA_e0_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het) SC_e0_results <- SC_e0_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)SA_h_results <- SA_h_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)SC_h_results <- SC_h_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)# plot the results using the Manhattan plot custom function we defined earlierSA_e0_het_plot <-build_manhattan_plot(SA_e0_results) +labs(title ="SA axis for life expectancy") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 15)) +xlab(NULL)SC_e0_het_plot <-build_manhattan_plot(SC_e0_results) +labs(title ="SC axis for life expectancy") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 15))SA_h_het_plot <-build_manhattan_plot(SA_h_results) +labs(title ="SA axis for lifespan equality") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 15)) +xlab(NULL)SC_h_het_plot <-build_manhattan_plot(SC_h_results) +labs(title ="SC axis for lifespan equality") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 15))SA_e0_het_plot + SA_h_het_plot + SC_e0_het_plot + SC_h_het_plot +plot_layout(ncol =2)```**Figure SX**. xxx# Investigating the rate of ageing and the scaling of mortality risk## Creating axes of ageing rate and scalingWe've shown that perpendicular deviation from the regression of lifespan equality on life expectancy closely corresponds to the rate of ageing ($b$) parameter in a gompertz mortality model. To identify regions of the genome associated with the rate of ageing, we can calculate a rate of ageing index for each line in each treatment, by once again using rotation matrices. To create this index, we rotate the coordinate system of the life expectancy and lifespan equality plane by $\theta$ degrees, where $\theta$ is the angle between the positive x-axis and the regression of lifespan equality on life expectancy (mean centered and standarised to $\sigma$ = 1).**Finding the slopes**```{r}Arya_f_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Arya_2010_f, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Arya_f_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Arya_f_slope <-as_draws_df(Arya_f_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Arya_m_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Arya_2010_m, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Arya_m_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Arya_m_slope <-as_draws_df(Arya_m_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Huang_f_18_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Huang_2020_f_18, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Huang_f_18_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Huang_f_18_slope <-as_draws_df(Huang_f_18_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Huang_m_18_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Huang_2020_m_18, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Huang_m_18_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Huang_m_18_slope <-as_draws_df(Huang_m_18_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Huang_f_25_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Huang_2020_f_25, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Huang_f_25_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Huang_f_25_slope <-as_draws_df(Huang_f_25_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Huang_m_25_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Huang_2020_m_25, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Huang_m_25_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Huang_m_25_slope <-as_draws_df(Huang_m_25_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Huang_f_28_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Huang_2020_f_28, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Huang_f_28_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Huang_f_28_slope <-as_draws_df(Huang_f_28_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Huang_m_28_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Huang_2020_m_28, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Huang_m_28_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Huang_m_28_slope <-as_draws_df(Huang_m_28_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Wilson_f_model_1 <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Wilson_2020_f_1, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Wilson_f_slope_1",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Wilson_f_slope_1 <-as_draws_df(Wilson_f_model_1) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Wilson_f_model_2 <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Wilson_2020_f_2, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Wilson_f_slope_2",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Wilson_f_slope_2 <-as_draws_df(Wilson_f_model_2) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Durham_f_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Durham_2014_f_adjusted, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Durham_f_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Durham_f_slope <-as_draws_df(Durham_f_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)Patel_f_model <-brm(h_scaled ~1+ e0_scaled,prior =c(prior(normal(0, 0.1), class = Intercept),prior(normal(0, 1), class = b),prior(exponential(1), class = sigma)),family = gaussian,iter =6000, warmup =2000,control =list(adapt_delta =0.8, max_treedepth =10),data = Patel_2021_f, chains =4, cores =4, file ="data/Derived/Ageing_axis_slopes/Patel_f_slope",#backend = "cmdstanr", stan_model_args = list(stanc_options = list("O1")),refresh =400, silent =0, seed =1)Patel_f_slope <-as_draws_df(Patel_f_model) %>%as_tibble() %>% dplyr::select(b_e0_scaled) %>%summarise(slope =mean(b_e0_scaled)) %>%pull(slope)```With regression coefficients found, we can use the following formula to calculate the angle (in radians) between the mean regression line and the x-axis:$\theta = tan^{-1}(\beta)$where $\beta$ is the point estimate of the slope from each model posterior distribution.```{r}Arya_f_angle <-atan(Arya_f_slope)Arya_m_angle <-atan(Arya_m_slope)Huang_f_18_angle <-atan(Huang_f_18_slope)Huang_m_18_angle <-atan(Huang_m_18_slope)Huang_f_25_angle <-atan(Huang_f_25_slope)Huang_m_25_angle <-atan(Huang_m_25_slope)Huang_f_28_angle <-atan(Huang_f_28_slope)Huang_m_28_angle <-atan(Huang_m_28_slope)Wilson_f_1_angle <-atan(Wilson_f_slope_1)Wilson_f_2_angle <-atan(Wilson_f_slope_2)Durham_f_angle <-atan(Durham_f_slope)Patel_f_angle <-atan(Patel_f_slope)```We then rotate the coordinate system of the life expectancy and lifespan equality plane clockwise by $\theta$:$$x' = xcos(\theta) - ysin(\theta),$$$$y' = xsin(\theta) + ycos(\theta),$$where $x'$ and $y'$ are the vectors of ageing rate and scaling, and $x$ and $y$ are life expectancy and lifespan equality. ```{r}Arya_2010_f <- Arya_2010_f %>%mutate(ageing_axis = h_scaled*cos(Arya_f_angle) - e0_scaled*sin(Arya_f_angle),scaling_axis = h_scaled*sin(Arya_f_angle) + e0_scaled*cos(Arya_f_angle))Arya_2010_m <- Arya_2010_m %>%mutate(ageing_axis = h_scaled*cos(Arya_m_angle) - e0_scaled*sin(Arya_m_angle),scaling_axis = h_scaled*sin(Arya_m_angle) + e0_scaled*cos(Arya_m_angle))Huang_2020_f_18 <- Huang_2020_f_18 %>%mutate(ageing_axis = h_scaled*cos(Huang_f_18_angle) - e0_scaled*sin(Huang_f_18_angle),scaling_axis = h_scaled*sin(Huang_f_18_angle) + e0_scaled*cos(Huang_f_18_angle))Huang_2020_m_18 <- Huang_2020_m_18 %>%mutate(ageing_axis = h_scaled*cos(Huang_m_18_angle) - e0_scaled*sin(Huang_m_18_angle),scaling_axis = h_scaled*sin(Huang_m_18_angle) + e0_scaled*cos(Huang_m_18_angle))Huang_2020_f_25 <- Huang_2020_f_25 %>%mutate(ageing_axis = h_scaled*cos(Huang_f_25_angle) - e0_scaled*sin(Huang_f_25_angle),scaling_axis = h_scaled*sin(Huang_f_25_angle) + e0_scaled*cos(Huang_f_25_angle))Huang_2020_m_25 <- Huang_2020_m_25 %>%mutate(ageing_axis = h_scaled*cos(Huang_m_25_angle) - e0_scaled*sin(Huang_m_25_angle),scaling_axis = h_scaled*sin(Huang_m_25_angle) + e0_scaled*cos(Huang_m_25_angle))Huang_2020_f_28 <- Huang_2020_f_28 %>%mutate(ageing_axis = h_scaled*cos(Huang_f_28_angle) - e0_scaled*sin(Huang_f_28_angle),scaling_axis = h_scaled*sin(Huang_f_28_angle) + e0_scaled*cos(Huang_f_28_angle))Huang_2020_m_28 <- Huang_2020_m_28 %>%mutate(ageing_axis = h_scaled*cos(Huang_m_28_angle) - e0_scaled*sin(Huang_m_28_angle),scaling_axis = h_scaled*sin(Huang_m_28_angle) + e0_scaled*cos(Huang_m_28_angle))Wilson_2020_f_1 <- Wilson_2020_f_1 %>%mutate(ageing_axis = h_scaled*cos(Wilson_f_1_angle) - e0_scaled*sin(Wilson_f_1_angle),scaling_axis = h_scaled*sin(Wilson_f_1_angle) + e0_scaled*cos(Wilson_f_1_angle))Wilson_2020_f_2 <- Wilson_2020_f_2 %>%mutate(ageing_axis = h_scaled*cos(Wilson_f_2_angle) - e0_scaled*sin(Wilson_f_2_angle),scaling_axis = h_scaled*sin(Wilson_f_2_angle) + e0_scaled*cos(Wilson_f_2_angle))Durham_2014_f_adjusted <- Durham_2014_f_adjusted %>%mutate(ageing_axis = h_scaled*cos(Durham_f_angle) - e0_scaled*sin(Durham_f_angle),scaling_axis = h_scaled*sin(Durham_f_angle) + e0_scaled*cos(Durham_f_angle))Patel_2021_f <- Patel_2021_f %>%mutate(ageing_axis = h_scaled*cos(Patel_f_angle) - e0_scaled*sin(Patel_f_angle),scaling_axis = h_scaled*sin(Patel_f_angle) + e0_scaled*cos(Patel_f_angle))```Plot the line means, coloured by their value on the ageing rate axis.```{r, fig.height=10, fig.width=12}ageing_axis_plot <-function(data, study_title){data %>%ggplot(aes(x = e0, y = h)) +geom_point(aes(fill = ageing_axis), shape =21, size =4) +scale_fill_moma_c("Avedon", direction =-1, limits =c(-4, 4)) +geom_smooth(method ="lm") +#coord_cartesian(xlim = c(15, 120), ylim = c(0, 3.2)) +labs(fill ="Ageing rate index",x ="e0",y ="h",title = study_title) +theme_bw() +theme(plot.title =element_text(hjust =0.5))}a <-ageing_axis_plot(Arya_2010_f, "Arya 2010 females")b <-ageing_axis_plot(Arya_2010_m, "Arya 2010 males")c <-ageing_axis_plot(Huang_2020_f_18, "Huang 18C females")d <-ageing_axis_plot(Huang_2020_m_18, "Huang 18C males")e <-ageing_axis_plot(Huang_2020_f_25, "Huang 25C females")f <-ageing_axis_plot(Huang_2020_m_25, "Huang 25C males")g <-ageing_axis_plot(Huang_2020_f_28, "Huang 28C females")h <-ageing_axis_plot(Huang_2020_m_28, "Huang 28C males")i <-ageing_axis_plot(Wilson_2020_f_1, "Wilson 25C females 1")j <-ageing_axis_plot(Wilson_2020_f_2, "Wilson 25C females 2")k <-ageing_axis_plot(Durham_2014_f_adjusted, "Durham 25C females")l <-ageing_axis_plot(Patel_2021_f, "Patel 23C females")(a | b | c) / (d | e | f) / (g | h| i) / (j | k | l) +#guide_area()) +plot_layout(guides ='collect')```## Run univariate GWAS```{r}Arya_f_ageing <-prep_for_ageing_GWAS(Arya_2010_f)Arya_m_ageing <-prep_for_ageing_GWAS(Arya_2010_m)Huang_f_18_ageing <-prep_for_ageing_GWAS(Huang_2020_f_18)Huang_m_18_ageing <-prep_for_ageing_GWAS(Huang_2020_m_18)Huang_f_25_ageing <-prep_for_ageing_GWAS(Huang_2020_f_25)Huang_m_25_ageing <-prep_for_ageing_GWAS(Huang_2020_m_25)Huang_f_28_ageing <-prep_for_ageing_GWAS(Huang_2020_f_28)Huang_m_28_ageing <-prep_for_ageing_GWAS(Huang_2020_m_28)Wilson_f_ageing_1 <-prep_for_ageing_GWAS(Wilson_2020_f_1)Wilson_f_ageing_2 <-prep_for_ageing_GWAS(Wilson_2020_f_2)Durham_f_ageing <-prep_for_ageing_GWAS(Durham_2014_f_adjusted)Patel_f_ageing <-prep_for_ageing_GWAS(Patel_2021_f)Arya_f_scaling <-prep_for_scaling_GWAS(Arya_2010_f)Arya_m_scaling <-prep_for_scaling_GWAS(Arya_2010_m)Huang_f_18_scaling <-prep_for_scaling_GWAS(Huang_2020_f_18)Huang_m_18_scaling <-prep_for_scaling_GWAS(Huang_2020_m_18)Huang_f_25_scaling <-prep_for_scaling_GWAS(Huang_2020_f_25)Huang_m_25_scaling <-prep_for_scaling_GWAS(Huang_2020_m_25)Huang_f_28_scaling <-prep_for_scaling_GWAS(Huang_2020_f_28)Huang_m_28_scaling <-prep_for_scaling_GWAS(Huang_2020_m_28)Wilson_f_scaling_1 <-prep_for_scaling_GWAS(Wilson_2020_f_1)Wilson_f_scaling_2 <-prep_for_scaling_GWAS(Wilson_2020_f_2)Durham_f_scaling <-prep_for_scaling_GWAS(Durham_2014_f_adjusted)Patel_f_scaling <-prep_for_scaling_GWAS(Patel_2021_f)if(!file.exists("data/Derived/GWAS_results/Arya_f_ageing.tsv.gz")) {run_GWAS(Arya_f_ageing)run_GWAS(Arya_m_ageing)run_GWAS(Huang_f_18_ageing)run_GWAS(Huang_m_18_ageing)run_GWAS(Huang_f_25_ageing)run_GWAS(Huang_m_25_ageing)run_GWAS(Huang_f_28_ageing)run_GWAS(Huang_m_28_ageing)run_GWAS(Wilson_f_ageing_1)run_GWAS(Wilson_f_ageing_2)run_GWAS(Durham_f_ageing)run_GWAS(Patel_f_ageing)run_GWAS(Arya_f_scaling)run_GWAS(Arya_m_scaling)run_GWAS(Huang_f_18_scaling)run_GWAS(Huang_m_18_scaling)run_GWAS(Huang_f_25_scaling)run_GWAS(Huang_m_25_scaling)run_GWAS(Huang_f_28_scaling)run_GWAS(Huang_m_28_scaling)run_GWAS(Wilson_f_scaling_1)run_GWAS(Wilson_f_scaling_2)run_GWAS(Durham_f_scaling)run_GWAS(Patel_f_scaling)}Arya_f_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Arya_f_ageing.tsv.gz") Arya_m_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Arya_m_ageing.tsv.gz") Huang_f_18_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_18_ageing.tsv.gz")Huang_m_18_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_18_ageing.tsv.gz")Huang_f_25_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_25_ageing.tsv.gz")Huang_m_25_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_25_ageing.tsv.gz")Huang_f_28_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_28_ageing.tsv.gz")Huang_m_28_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_28_ageing.tsv.gz")Wilson_f_ageing_1_GWAS <-read_tsv("data/Derived/GWAS_results/Wilson_f_ageing_1.tsv.gz")Wilson_f_ageing_2_GWAS <-read_tsv("data/Derived/GWAS_results/Wilson_f_ageing_2.tsv.gz")Durham_f_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Durham_f_ageing.tsv.gz")Patel_f_ageing_GWAS <-read_tsv("data/Derived/GWAS_results/Patel_f_ageing.tsv.gz")Arya_f_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Arya_f_scaling.tsv.gz") Arya_m_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Arya_m_scaling.tsv.gz") Huang_f_18_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_18_scaling.tsv.gz")Huang_m_18_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_18_scaling.tsv.gz")Huang_f_25_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_25_scaling.tsv.gz")Huang_m_25_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_25_scaling.tsv.gz")Huang_f_28_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_f_28_scaling.tsv.gz")Huang_m_28_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Huang_m_28_scaling.tsv.gz")Wilson_f_scaling_1_GWAS <-read_tsv("data/Derived/GWAS_results/Wilson_f_scaling_1.tsv.gz")Wilson_f_scaling_2_GWAS <-read_tsv("data/Derived/GWAS_results/Wilson_f_scaling_2.tsv.gz")Durham_f_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Durham_f_scaling.tsv.gz")Patel_f_scaling_GWAS <-read_tsv("data/Derived/GWAS_results/Patel_f_scaling.tsv.gz")```How many variants do we find that have notable associations with ageing rate?```{r}# filter down to sig associationsageing_axis_table <-bind_rows(Arya_f_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Arya_f_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Sex ="Female",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Arya_m_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Arya_m_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Sex ="Male",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_f_18_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_18_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Female",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_m_18_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_18_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Male",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_f_25_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_25_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_m_25_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_25_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Male",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_f_28_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_28_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Female",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_m_28_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_28_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Male",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Wilson_f_ageing_1_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Wilson_f_ageing_1_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Wilson et al 2020",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Wilson_f_ageing_2_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Wilson_f_ageing_2_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Wilson et al 2020",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Durham_f_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Durham_f_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Durham et al 2014",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Patel_f_ageing_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Patel_f_ageing_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Patel et al 2021",Sex ="Female",Temperature ="23",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?ageing_axis_p_05_SNPs <-bind_rows( Arya_f_ageing_GWAS %>%filter(P <1e-05), Arya_m_ageing_GWAS %>%filter(P <1e-05), Huang_f_18_ageing_GWAS %>%filter(P <1e-05), Huang_m_18_ageing_GWAS %>%filter(P <1e-05), Huang_f_25_ageing_GWAS %>%filter(P <1e-05), Huang_m_25_ageing_GWAS %>%filter(P <1e-05), Huang_f_28_ageing_GWAS %>%filter(P <1e-05), Huang_m_28_ageing_GWAS %>%filter(P <1e-05), Wilson_f_ageing_1_GWAS %>%filter(P <1e-05), Wilson_f_ageing_2_GWAS %>%filter(P <1e-05), Durham_f_ageing_GWAS %>%filter(P <1e-05), Patel_f_ageing_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()ageing_axis_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= ageing_axis_p_05_SNPs,`p < 1e-08 variants`=sum(ageing_axis_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()```What about scaling?```{r}# filter down to sig associationsscaling_axis_table <-bind_rows(Arya_f_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Arya_f_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Sex ="Female",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Arya_m_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Arya_m_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Arya et al 2010",Sex ="Male",Temperature ="25",`Mating status`="Virgin") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_f_18_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_18_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Female",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_m_18_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_18_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Male",Temperature ="18",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_f_25_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_25_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_m_25_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_25_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Male",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_f_28_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_f_28_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Female",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Huang_m_28_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Huang_m_28_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Huang et al 2020",Sex ="Male",Temperature ="28",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Wilson_f_scaling_1_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Wilson_f_scaling_1_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Wilson et al 2020",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Wilson_f_scaling_2_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Wilson_f_scaling_2_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Wilson et al 2020",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Durham_f_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Durham_f_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Durham et al 2014",Sex ="Female",Temperature ="25",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`),Patel_f_scaling_GWAS %>%filter(P <1e-05) %>%transmute(`p < 1e-05 variants`=n(),`p < 1e-08 variants`=nrow(filter(Patel_f_scaling_GWAS, P <1e-08))) %>%distinct() %>%mutate(Study ="Patel et al 2021",Sex ="Female",Temperature ="23",`Mating status`="Mated") %>% dplyr::select(Study, Sex, Temperature, `Mating status`, `p < 1e-05 variants`, `p < 1e-08 variants`)) # how many unique variants have been detected?scaling_axis_p_05_SNPs <-bind_rows( Arya_f_scaling_GWAS %>%filter(P <1e-05), Arya_m_scaling_GWAS %>%filter(P <1e-05), Huang_f_18_scaling_GWAS %>%filter(P <1e-05), Huang_m_18_scaling_GWAS %>%filter(P <1e-05), Huang_f_25_scaling_GWAS %>%filter(P <1e-05), Huang_m_25_scaling_GWAS %>%filter(P <1e-05), Huang_f_28_scaling_GWAS %>%filter(P <1e-05), Huang_m_28_scaling_GWAS %>%filter(P <1e-05), Wilson_f_scaling_1_GWAS %>%filter(P <1e-05), Wilson_f_scaling_2_GWAS %>%filter(P <1e-05), Durham_f_scaling_GWAS %>%filter(P <1e-05), Patel_f_scaling_GWAS %>%filter(P <1e-05)) %>%distinct(SNP) %>%nrow()scaling_axis_table %>%add_row(Study ="Totals",Temperature ="",`Mating status`="",`p < 1e-05 variants`= scaling_axis_p_05_SNPs,`p < 1e-08 variants`=sum(scaling_axis_table$`p < 1e-08 variants`)) %>%kable() %>%kable_styling()```## Run CPASSOC### Generate the genetic correlation matrixUsing SNP effect sizes, we calculate the genetic correlations between a) rates of ageing and b) scaling of mortality, measured in different environments.```{r}# use the BETA coefficients to build the SNP correlation matrix for the rate of ageingSNP_ageing_axis_data <-bind_rows( Arya_f_ageing_GWAS %>%mutate(Study ="Arya_2010", Temperature =25, Sex ="Female"), Arya_m_ageing_GWAS %>%mutate(Study ="Arya_2010", Temperature =25, Sex ="Male"), Huang_f_18_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =18, Sex ="Female"), Huang_m_18_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =18, Sex ="Male"), Huang_f_25_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =25, Sex ="Female"), Huang_m_25_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =25, Sex ="Male"), Huang_f_28_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =28, Sex ="Female"), Huang_m_28_ageing_GWAS %>%mutate(Study ="Huang_2020", Temperature =28, Sex ="Male"), Wilson_f_ageing_1_GWAS %>%mutate(Study ="Wilson_2020_1", Temperature =25, Sex ="Female"), Wilson_f_ageing_2_GWAS %>%mutate(Study ="Wilson_2020_2", Temperature =25, Sex ="Female"), Durham_f_ageing_GWAS %>%mutate(Study ="Durham_2014", Temperature =25, Sex ="Female"), Patel_f_ageing_GWAS %>%mutate(Study ="Patel_2021", Temperature =23, Sex ="Female")) %>% dplyr::select(SNP, BETA, Study, Temperature, Sex) %>%pivot_wider(values_from = BETA, names_from =c(Study, Temperature, Sex)) %>% dplyr::select(-SNP) SNP_ageing_axis_corr_matrix <-cor(SNP_ageing_axis_data, use ="pairwise.complete.obs", method ="spearman")# use the BETA coefficients to build the SNP correlation matrix for scalingSNP_scaling_axis_data <-bind_rows( Arya_f_scaling_GWAS %>%mutate(Study ="Arya_2010", Temperature =25, Sex ="Female"), Arya_m_scaling_GWAS %>%mutate(Study ="Arya_2010", Temperature =25, Sex ="Male"), Huang_f_18_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =18, Sex ="Female"), Huang_m_18_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =18, Sex ="Male"), Huang_f_25_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =25, Sex ="Female"), Huang_m_25_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =25, Sex ="Male"), Huang_f_28_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =28, Sex ="Female"), Huang_m_28_scaling_GWAS %>%mutate(Study ="Huang_2020", Temperature =28, Sex ="Male"), Wilson_f_scaling_1_GWAS %>%mutate(Study ="Wilson_2020_1", Temperature =25, Sex ="Female"), Wilson_f_scaling_2_GWAS %>%mutate(Study ="Wilson_2020_2", Temperature =25, Sex ="Female"), Durham_f_scaling_GWAS %>%mutate(Study ="Durham", Temperature =25, Sex ="Female"), Patel_f_scaling_GWAS %>%mutate(Study ="Patel", Temperature =23, Sex ="Female")) %>% dplyr::select(SNP, BETA, Study, Temperature, Sex) %>%pivot_wider(values_from = BETA, names_from =c(Study, Temperature, Sex)) %>% dplyr::select(-SNP) SNP_scaling_axis_corr_matrix <-cor(SNP_scaling_axis_data, use ="pairwise.complete.obs", method ="spearman")```### Calculate meta-analytic test statisticsThe purpose of these meta-analyses is to search for SNPs that affect 1) the rate of ageing and 2) the scaling of the risk of mortality. **Run CPASSOC for the rate of ageing**```{r}# rate of ageingageing_axis_Arya_f_T <- Arya_f_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_f = T)ageing_axis_Arya_m_T <- Arya_m_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_m = T)ageing_axis_Huang_f_18_T <- Huang_f_18_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_18 = T)ageing_axis_Huang_m_18_T <- Huang_m_18_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_18 = T)ageing_axis_Huang_f_25_T <- Huang_f_25_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_25 = T)ageing_axis_Huang_m_25_T <- Huang_m_25_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_25 = T)ageing_axis_Huang_f_28_T <- Huang_f_28_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_28 = T)ageing_axis_Huang_m_28_T <- Huang_m_28_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_28 = T)ageing_axis_Wilson_f_1_T <- Wilson_f_ageing_1_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_1 = T)ageing_axis_Wilson_f_2_T <- Wilson_f_ageing_2_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_2 = T)ageing_axis_Durham_f_T <- Durham_f_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Durham_f = T)ageing_axis_Patel_f_T <- Patel_f_ageing_GWAS %>% dplyr::select(SNP, T) %>%rename(Patel_f = T)ageing_axis_effect_sizes <- ageing_axis_Arya_f_T %>%inner_join(ageing_axis_Arya_m_T, by ="SNP") %>%inner_join(ageing_axis_Huang_f_18_T, by ="SNP") %>%inner_join(ageing_axis_Huang_m_18_T, by ="SNP") %>%inner_join(ageing_axis_Huang_f_25_T, by ="SNP") %>%inner_join(ageing_axis_Huang_m_25_T, by ="SNP") %>%inner_join(ageing_axis_Huang_f_28_T, by ="SNP") %>%inner_join(ageing_axis_Huang_m_28_T, by ="SNP") %>%inner_join(ageing_axis_Wilson_f_1_T, by ="SNP") %>%inner_join(ageing_axis_Wilson_f_2_T, by ="SNP") %>%inner_join(ageing_axis_Durham_f_T, by ="SNP") %>%inner_join(ageing_axis_Patel_f_T, by ="SNP") ageing_axis_effect_size_values <- ageing_axis_effect_sizes %>% dplyr::select(2:13)Sample_size_ageing_axis <-c(165, 165, 183, 183, 186, 186, 177, 177, 161, 161, 176, 193)if(!file.exists("data/Derived/GWAS_results/ageing_axis_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(ageing_axis_effect_size_values, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits e.g. if a SNP has a sex or environment opposite effect on lifespan# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix);S_het <-SHet(ageing_axis_effect_size_values, Sample_size_ageing_axis, SNP_ageing_axis_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)ageing_axis_meta_results <- ageing_axis_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(ageing_axis_meta_results, "data/Derived/GWAS_results/ageing_axis_meta_results.csv")} else ageing_axis_meta_results <-read_csv("data/Derived/GWAS_results/ageing_axis_meta_results.csv")```**Run CPASSOC for the scaling of mortality risk**```{r}# scalingscaling_axis_Arya_f_T <- Arya_f_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_f = T)scaling_axis_Arya_m_T <- Arya_m_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Arya_m = T)scaling_axis_Huang_f_18_T <- Huang_f_18_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_18 = T)scaling_axis_Huang_m_18_T <- Huang_m_18_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_18 = T)scaling_axis_Huang_f_25_T <- Huang_f_25_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_25 = T)scaling_axis_Huang_m_25_T <- Huang_m_25_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_25 = T)scaling_axis_Huang_f_28_T <- Huang_f_28_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_f_28 = T)scaling_axis_Huang_m_28_T <- Huang_m_28_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Huang_m_28 = T)scaling_axis_Wilson_f_1_T <- Wilson_f_scaling_1_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_1 = T)scaling_axis_Wilson_f_2_T <- Wilson_f_scaling_2_GWAS %>% dplyr::select(SNP, T) %>%rename(Wilson_f_2 = T)scaling_axis_Durham_f_T <- Durham_f_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Durham_f = T)scaling_axis_Patel_f_T <- Patel_f_scaling_GWAS %>% dplyr::select(SNP, T) %>%rename(Patel_f = T)scaling_axis_effect_sizes <- scaling_axis_Arya_f_T %>%inner_join(scaling_axis_Arya_m_T, by ="SNP") %>%inner_join(scaling_axis_Huang_f_18_T, by ="SNP") %>%inner_join(scaling_axis_Huang_m_18_T, by ="SNP") %>%inner_join(scaling_axis_Huang_f_25_T, by ="SNP") %>%inner_join(scaling_axis_Huang_m_25_T, by ="SNP") %>%inner_join(scaling_axis_Huang_f_28_T, by ="SNP") %>%inner_join(scaling_axis_Huang_m_28_T, by ="SNP") %>%inner_join(scaling_axis_Wilson_f_1_T, by ="SNP") %>%inner_join(scaling_axis_Wilson_f_2_T, by ="SNP") %>%inner_join(scaling_axis_Durham_f_T, by ="SNP") %>%inner_join(scaling_axis_Patel_f_T, by ="SNP") scaling_axis_effect_size_values <- scaling_axis_effect_sizes %>% dplyr::select(2:13)Sample_size_scaling_axis <-c(165, 165, 183, 183, 186, 186, 177, 177, 161, 161, 176, 193)if(!file.exists("data/Derived/GWAS_results/scaling_axis_meta_results.csv")) {# run the homogeneous effect meta-analysisS_hom <-SHom(scaling_axis_effect_size_values, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix)# calculate meta-p-values and bind the two together with the SNP namesp_S_hom <-pchisq(S_hom, df =1, ncp =0, lower.tail = F) %>%as_tibble() %>%bind_cols(S_hom) %>%rename(meta_p_hom = value, S_hom = ...2)# Calculate S_het, an extension of S_hom that improves power when the genetic effect sizes vary for different traits (e.g. if a SNP has a sex or enviornment opposite effect on lifespan)# estimate parameters of gamma distributionpara <-EstimateGamma(N =1E4, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix);S_het <-SHet(scaling_axis_effect_size_values, Sample_size_scaling_axis, SNP_scaling_axis_corr_matrix)# obtain P-values of S_Het using the estimated gamma parametersp_S_het <-pgamma(q = S_het-para[3], shape = para[1], scale = para[2], lower.tail = F) %>%as_tibble() %>%bind_cols(S_het) %>%rename(meta_p_het = value, S_het = ...2)scaling_axis_meta_results <- scaling_axis_effect_sizes %>%bind_cols(p_S_hom, p_S_het) # add the unadjusted p valueswrite_csv(scaling_axis_meta_results, "data/Derived/GWAS_results/scaling_axis_meta_results.csv")} else scaling_axis_meta_results <-read_csv("data/Derived/GWAS_results/scaling_axis_meta_results.csv")```### Visualise the resultsWe combine GWAS summary statistics calculated from ageing and scaling axis data measured across different contexts. It's possible that some SNPs have G x E interactions that would lead to a heterogeneous effect across phenotypes. We therefore utilise the `S_het` calculated p-values.First lets show the effect of CPASSOC on the number of variants found to be associated with the rate of ageing and the scaling of mortality risk.**Table XX**. Genotype to phenotype associations detected by univariate GWAS, for the ageing rate and scaling of mortality axes. The total row shows the number of unique candidate variants identified across all studies.```{r}tibble(Analysis =c("CPASSOC", "Univariate", "CPASSOC", "Univariate"),Trait =c("Ageing rate", "Ageing rate", "Scaling", "Scaling"),`p < 1e-05 variants`=c(sum(ageing_axis_meta_results$meta_p_het <1e-05), ageing_axis_p_05_SNPs,sum(scaling_axis_meta_results$meta_p_het <1e-05), scaling_axis_p_05_SNPs),`p < 1e-08 variants`=c(sum(ageing_axis_meta_results$meta_p_het <1e-08),sum(ageing_axis_table$`p < 1e-08 variants`),sum(scaling_axis_meta_results$meta_p_het <1e-08),sum(scaling_axis_table$`p < 1e-08 variants`))) %>%kable() %>%kable_styling()```**Table SX**...variants that affect our proxy for the rate of ageing.```{r}# join gene annotations with the list of analysed variants # note that some SNPs are associated with >1 gene, because the gene annotations overlap (I think) or the variant is close to multiple annotated genes. Others are not near any known genes, producing NAs.ageing_rate_genes <- ageing_axis_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^8, 5)/10^8,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)ageing_rate_genes %>%my_data_table()```**Table SX**...variants that affect our proxy for scaling.```{r}scaling_genes <- scaling_axis_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP, S_het, meta_p_het) %>%left_join(annotations %>%filter(distance.to.gene <=500)) %>%mutate(meta_p_het =round(meta_p_het*10^8, 5)/10^8,S_het =round(S_het, 3)) %>% dplyr::select(SNP, S_het, meta_p_het, FBID, gene_name, site.class, distance.to.gene)scaling_genes %>%my_data_table()```Now lets build some 'Manhattan plots' to show where these significant associations can be found:```{r, fig.width=11}#| column: pageageing_axis_results <- ageing_axis_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)scaling_axis_results <- scaling_axis_meta_results %>% dplyr::select(SNP, meta_p_hom, meta_p_het) %>%rename(P = meta_p_het)# plot the results using the manhattan plot custom function we defined earlierageing_axis_het_plot <-build_manhattan_plot(ageing_axis_results) +labs(title ="Ageing rate") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 12.5))scaling_axis_het_plot <-build_manhattan_plot(scaling_axis_results) +labs(title ="Scaling of mortality") +theme(plot.title =element_text(size =20, hjust =0.5)) +coord_cartesian(ylim =c(0, 12.5))ageing_axis_het_plot + scaling_axis_het_plot ```Plot the univariate effect sizes for each of the SNPs associated with the rate of ageing at the genome-wide significance threshold (p < $0.05^{-8}$) after CPASSOC.```{r, fig.height=7}SNP_heatmap_ageing_axis <- ageing_axis_meta_results %>%filter(meta_p_het <1e-08) %>%arrange(meta_p_het) %>% dplyr::select(1:13) row_name <- SNP_heatmap_ageing_axis$SNPSNP_heatmap_ageing_axis <- SNP_heatmap_ageing_axis %>% dplyr::select(-SNP) %>%as.matrix()rownames(SNP_heatmap_ageing_axis) <- row_namebreaksList <-seq(-5, 5, by =0.1)annotation_SNPs <- ageing_axis_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP) %>%mutate(Chromosome =case_when(str_detect(SNP, "2L") ~"2L",str_detect(SNP, "2R") ~"2R",str_detect(SNP, "3L") ~"3L",str_detect(SNP, "3R") ~"3R",str_detect(SNP, "X") ~"X"))annotation_studies <-tibble(Study =c("Arya_f","Arya_m","Huang_f_18","Huang_m_18","Huang_f_25","Huang_m_25","Huang_f_28","Huang_m_28","Wilson_f_1","Wilson_f_2","Durham_f_25","Patel_f_23"),Temperature =c("25","25","18","18","25","25","28","28","25","25","25","23")) %>%mutate(Sex =case_when(str_detect(Study, "_f") ~"Female",.default ="Male"),Mating =case_when(str_detect(Study, "Arya") ~"NO",str_detect(Study, "Huang") ~"Throughout life",str_detect(Study, "Wilson") ~"Early life",str_detect(Study, "Durham") ~"Throughout life",str_detect(Study, "Patel") ~"Early life"),Author =str_extract(Study, ".*(?=\\_)"),Author =str_remove(Author, "_f"),Author =str_remove(Author, "_m"))# create a study annotation column, need this to be a data.frame rather than a tibble for some reason Study_details <- annotation_studies %>%as.data.frame() %>% dplyr::select(Study, Temperature, Mating)my_categories <-data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],Mating = Study_details[, 3])my_colors <-list(Temperature =c("18"="#7bbcd5", # sailboat colours from pnw"23"="#d0e2af","25"="#f5db99","28"="#e89c81"),Mating =c("NO"="#f8e3d1", # Shuksan from pnw"Early life"="#d7b1c5","Throughout life"="#ac8eab"),Chromosome =c("2L"="#d8aedd", # lake colours from pnw"2R"="#cb74ad","3L"="#11c2b5","3R"="#72e1e1","X"="#fbcc74"))# create a SNP annotation columnSNP_details <- annotation_SNPs %>%as.data.frame()my_SNP_categories <-data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])my_col_names <-c("Arya et al females", "Arya et al males", "Huang et al females", "Huang et al males", "Huang et al females", "Huang et al males","Huang et al females", "Huang et al males", "Wilson et al females 1","Wilson et al females 2", "Durham et al females","Patel et al females")pheatmap(SNP_heatmap_ageing_axis, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =4, cutree_cols =4, angle_col =45, border_color ="white",annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,fontsize =8, labels_col = my_col_names)```Plot the univariate effect sizes for each of the SNPs associated with the scaling of mortality risk at the genome-wide significance threshold (p < $0.05^{-8}$) after CPASSOC.```{r, fig.height=9}SNP_heatmap_scaling_axis <- scaling_axis_meta_results %>%filter(meta_p_het <1e-08) %>%arrange(meta_p_het) %>% dplyr::select(1:13) row_name <- SNP_heatmap_scaling_axis$SNPSNP_heatmap_scaling_axis <- SNP_heatmap_scaling_axis %>% dplyr::select(-SNP) %>%as.matrix()rownames(SNP_heatmap_scaling_axis) <- row_namebreaksList <-seq(-5, 5, by =0.1)annotation_SNPs <- scaling_axis_meta_results %>%filter(meta_p_het <1e-08) %>% dplyr::select(SNP) %>%mutate(Chromosome =case_when(str_detect(SNP, "2L") ~"2L",str_detect(SNP, "2R") ~"2R",str_detect(SNP, "3L") ~"3L",str_detect(SNP, "3R") ~"3R",str_detect(SNP, "X") ~"X"))annotation_studies <-tibble(Study =c("Arya_f","Arya_m","Huang_f_18","Huang_m_18","Huang_f_25","Huang_m_25","Huang_f_28","Huang_m_28","Wilson_f_1","Wilson_f_2","Durham_f_25","Patel_f_23"),Temperature =c("25","25","18","18","25","25","28","28","25","25","25","23")) %>%mutate(Sex =case_when(str_detect(Study, "_f") ~"Female",.default ="Male"),Mating =case_when(str_detect(Study, "Arya") ~"NO",str_detect(Study, "Huang") ~"Throughout life",str_detect(Study, "Wilson") ~"Early life",str_detect(Study, "Durham") ~"Throughout life",str_detect(Study, "Patel") ~"Early life"),Author =str_extract(Study, ".*(?=\\_)"),Author =str_remove(Author, "_f"),Author =str_remove(Author, "_m"))# create a study annotation column, need this to be a data.frame rather than a tibble for some reason Study_details <- annotation_studies %>%as.data.frame() %>% dplyr::select(Study, Temperature, Mating)my_categories <-data.frame(row.names = Study_details[, 1], Temperature = Study_details[, 2],Mating = Study_details[, 3])my_colors <-list(Temperature =c("18"="#7bbcd5", # sailboat colours from pnw"23"="#d0e2af","25"="#f5db99","28"="#e89c81"),Mating =c("NO"="#f8e3d1", # Shuksan from pnw"Early life"="#d7b1c5","Throughout life"="#ac8eab"),Chromosome =c("2L"="#d8aedd", # lake colours from pnw"2R"="#cb74ad","3L"="#11c2b5","3R"="#72e1e1","X"="#fbcc74"))# create a SNP annotation columnSNP_details <- annotation_SNPs %>%as.data.frame()my_SNP_categories <-data.frame(row.names = SNP_details[, 1], Chromosome = SNP_details[, 2])my_col_names <-c("Arya et al females", "Arya et al males", "Huang et al females", "Huang et al males", "Huang et al females", "Huang et al males","Huang et al females", "Huang et al males", "Wilson et al females 1","Wilson et al females 2", "Durham et al females","Patel et al females")pheatmap(SNP_heatmap_scaling_axis, breaks = breaksList, main ="", legend_labels =c("-1", "-0.5", "0", "0.5", "Genetic correlation\n"),color =colorRampPalette(rev(met.brewer("Benedictus", direction =1)))(length(breaksList)),legend =TRUE, cutree_rows =5, cutree_cols =4, angle_col =45, border_color ="white",annotation_col = my_categories, annotation_colors = my_colors, annotation_row = my_SNP_categories,fontsize =8, labels_col = my_col_names)```There are several genomic regions where we detect multiple variants with very similar to identical effect sizes, indicating linkage disequilibrium. In these cases, the variant with the true causal effect can't be identified (assuming there is a true phenotypic effect).